mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 13:43:47 +08:00
Bio.Align parser/writer for A2M alignment files (#3966)
* test files * test script * add module * add state column annotation * update
This commit is contained in:
130
Bio/Align/a2m.py
Normal file
130
Bio/Align/a2m.py
Normal file
@ -0,0 +1,130 @@
|
||||
# Copyright 2022 by Michiel de Hoon. All rights reserved.
|
||||
#
|
||||
# This file is part of the Biopython distribution and governed by your
|
||||
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
|
||||
# Please see the LICENSE file that should have been included as part of this
|
||||
# package.
|
||||
"""Bio.Align support for A2M files.
|
||||
|
||||
A2M files are alignment files created by align2model or hmmscore in the SAM
|
||||
Sequence Alignment and Modeling Software System.
|
||||
"""
|
||||
from Bio.Align import Alignment
|
||||
from Bio.Align import interfaces
|
||||
from Bio.Seq import Seq
|
||||
from Bio.SeqRecord import SeqRecord
|
||||
from Bio import BiopythonExperimentalWarning
|
||||
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"Bio.Align.a2m is an experimental module which may undergo "
|
||||
"significant changes prior to its future official release.",
|
||||
BiopythonExperimentalWarning,
|
||||
)
|
||||
|
||||
|
||||
class AlignmentWriter(interfaces.AlignmentWriter):
|
||||
"""Alignment file writer for the A2M file format."""
|
||||
|
||||
def __init__(self, target):
|
||||
"""Create an AlignmentWriter object.
|
||||
|
||||
Arguments:
|
||||
- target - output stream or file name
|
||||
|
||||
"""
|
||||
super().__init__(target, mode="w")
|
||||
|
||||
def format_alignment(self, alignment):
|
||||
"""Return a string with the alignment in the A2M file format."""
|
||||
if not isinstance(alignment, Alignment):
|
||||
raise TypeError("Expected an Alignment object")
|
||||
lines = []
|
||||
state = alignment.column_annotations["state"]
|
||||
for sequence, line in zip(alignment.sequences, alignment):
|
||||
lines.append(f">{sequence.id} {sequence.description}")
|
||||
s = ""
|
||||
for c, m in zip(line, state):
|
||||
if m == "D":
|
||||
s += c.upper()
|
||||
elif m == "I":
|
||||
if c == "-":
|
||||
s += "."
|
||||
else:
|
||||
s += c.lower()
|
||||
lines.append(s)
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
class AlignmentIterator(interfaces.AlignmentIterator):
|
||||
"""Alignment iterator for files in the A2M file format.
|
||||
|
||||
An A2M file contains one multiple alignment. Matches are represented by
|
||||
upper case letters and deletions by dashes in alignment columns containing
|
||||
matches or deletions only. Insertions are represented by lower case letters,
|
||||
with gaps aligned to the insertion shown as periods. Header lines start
|
||||
with '>' followed by the name of the sequence, and optionally a description.
|
||||
"""
|
||||
|
||||
def __init__(self, source):
|
||||
"""Create an AlignmentIterator object.
|
||||
|
||||
Arguments:
|
||||
- source - input data or file name
|
||||
|
||||
"""
|
||||
super().__init__(source, mode="t", fmt="A2M")
|
||||
|
||||
def parse(self, stream):
|
||||
"""Parse the next alignment from the stream."""
|
||||
names = []
|
||||
descriptions = []
|
||||
lines = []
|
||||
for line in stream:
|
||||
if line.startswith(">"):
|
||||
parts = line[1:].rstrip().split(None, 1)
|
||||
try:
|
||||
name, description = parts
|
||||
except ValueError:
|
||||
name = parts[0]
|
||||
description = None
|
||||
names.append(name)
|
||||
descriptions.append(description)
|
||||
lines.append("")
|
||||
else:
|
||||
lines[-1] += line.strip()
|
||||
if not lines:
|
||||
raise ValueError("Empty file.")
|
||||
state = ""
|
||||
for c in lines[0]:
|
||||
if c == "-" or c.isupper():
|
||||
state += "D" # Match/deletion state
|
||||
elif c == "." or c.islower():
|
||||
state += "I" # Insertion state
|
||||
else:
|
||||
raise Exception("Unexpected letter '%s' in alignment" % c)
|
||||
for line in lines[1:]:
|
||||
for c, m in zip(line, state):
|
||||
if m == "D": # Match/deletion state
|
||||
assert c == "-" or c.isupper()
|
||||
elif m == "I": # Insertion state
|
||||
assert c == "." or c.islower()
|
||||
else:
|
||||
raise Exception("Unexpected letter '%s' in alignment" % c)
|
||||
for i, line in enumerate(lines):
|
||||
lines[i] = line.upper().replace(".", "-")
|
||||
coordinates = Alignment.infer_coordinates(lines)
|
||||
records = []
|
||||
for name, description, line in zip(names, descriptions, lines):
|
||||
line = line.replace("-", "")
|
||||
sequence = Seq(line)
|
||||
if description is None:
|
||||
record = SeqRecord(sequence, name)
|
||||
else:
|
||||
record = SeqRecord(sequence, name, description=description)
|
||||
records.append(record)
|
||||
alignment = Alignment(records, coordinates)
|
||||
alignment.column_annotations = {}
|
||||
alignment.column_annotations["state"] = state
|
||||
yield alignment
|
16
Tests/Clustalw/clustalw.a2m
Normal file
16
Tests/Clustalw/clustalw.a2m
Normal file
@ -0,0 +1,16 @@
|
||||
>gi|4959044|gb|AAD34209.1|AF069
|
||||
MENSDSNDKGSDQSAAQRRSQMDRLDREEAFYQFVNNLSEEDYRLMRDNNLLGTPGESTEEELLRRLQQIKEGPPPQSPDENRAGESSDDVTNSDSIIDW
|
||||
LNSVRQTGNTTRSRQRGNQSWRAVSRTNPNSGDFRFSLEINVNRNNGSQTSENESEPSTRRLSVENMESSSQRQMENSASESASARPSRAERNSTEAVTE
|
||||
VPTTRAQRRARSRSPEHRRTRARAERSMSPLQPTSEIPRRAPTLEQSSENEPEGSSRTRHHVTLRQQISGPELLGRGLFAASGSRNPSQGTSSSDTGSNS
|
||||
ESSGSGQRPPTIVLDLQVRRVRPGEYRQRDSIASRTRSRSQAPNNTVTYESERGGFRRTFSRSERAGVRTYVSTIRIPIRRILNTGLSETTSVAIQTMLR
|
||||
QIMTGFGELSYFMYSDSDSEPSASVSSRNVERVESRNGRGSSGGGNSSGSSSSSSPSPSSSGESSESSSKMFEGSSEGGSSGPSRKDGRHRAPVTFDESG
|
||||
SLPFFSLAQFFLLNEDDEDQPRGLTKEQIDNLAMRSFGENDALKTCSVCITEYTEGDKLRKLPCSHEFHVHCIDRWLSE.NSTCPICRRAVLSSGNRESV
|
||||
V
|
||||
>gi|671626|emb|CAA85685.1|
|
||||
---------MSPQTETKASVGFKAGVKEYKLTYYTPEYETKDTDILAAFRVTPQPG-----------------VPPEEAGAAVAAESSTGT---------
|
||||
WTTVWTDGLTSLDRYKG-----RCYHIEPVPG-------------------EKDQCICYVAYPLDLFEEGSVTNMFTSIVGNVFGFKALRALRLEDLRIP
|
||||
VAYVKTFQGPPHGIQVERDKLNKYGRPLLGCTIKPKLGLSAKNYGRAVYECLRGGLDFTKDDENVNSQPFMRWRDRFLFCAEAIYKAQAETGEIKGHYLN
|
||||
ATAG-----------------------TCEEMIKRAIFARELGVPIVMHDYLTGGFTANTSLAHYCRDNGLLLHIHRAMHAVIDRQKNHGMHFRVLAKAL
|
||||
RLSGGDHIHSGTVVGKLEGERDITLGFVDLLRDDFIEKDRSRGIYFTQDWVSLPGVIPVASG-----------------------------GIHVWHMPA
|
||||
LTEIFGDDSVLQFGGGTLGHPWGNAPGAVANRVA-----------VEACVKARNEG---RDLAAEGNAIIREACKWSPElAAACEVWKEIKFEFPAMD--
|
||||
-
|
4
Tests/Clustalw/kalign.a2m
Normal file
4
Tests/Clustalw/kalign.a2m
Normal file
@ -0,0 +1,4 @@
|
||||
>Test1seq
|
||||
GCTGGGGATGGAGAGGGAACAGAGT.T
|
||||
>AT3G20900
|
||||
GCTGGGGATGGAGAGGGAACAGAGTaG
|
32
Tests/Clustalw/msaprobs.a2m
Normal file
32
Tests/Clustalw/msaprobs.a2m
Normal file
@ -0,0 +1,32 @@
|
||||
>V_Harveyi_PATH
|
||||
MKNW........IKV....AVAAI.A..LSAA...................TVQAATEVKVGMSGRYFPFTFVK..QDKLQGFEVDMWDEIGKRNDYKIE
|
||||
YVTANFSGLFGLLETGRIDTISNQITMTDARKAKYLFADPYVVDGAQITVRK.GNDSIQGVEDLAGKTVAVNLGSNFEQLLRDYDKDGKINIKTYDT..G
|
||||
IEHDVALGRADAFIMDRLSALE.LIKKTG.LPLQLAGEPFE.....TIQNAWPFVDNEKGRKLQAEVNKALAEMRADGTVEKISVKWFGADITK....
|
||||
>B_subtilis_YXEM
|
||||
MKMKkw......TVL....VVAALlA.vLSACgn............g.nssSKEDDNVLHVGATGQSYPFAYKE..NGKLTGFDVEVMEAVAKKIDMKLD
|
||||
WKLLEFSGLMGELQTGKLDTISNQVAVTDERKETYNFTKPYAYAGTQIVVKK.DNTDIKSVDDLKGKTVAAVLGSNHAKNLESKDPDKKINIKTYETqeG
|
||||
TLKDVAYGRVDAYVNSRTVLIA.QIKKTG.LPLKLAGDPIV.....YEQVAFPFAKDDAHDKLRKKVNKALDELRKDGTLKKLSEKYFNEDITVeqkh
|
||||
>FLIY_ECOLI
|
||||
MKLAhlgrqalmGVM....AVALVaG..MSVKsf.........adeg.llnKVKERGTLLVGLEGTYPPFSFQGd.DGKLTGFEVEFAQQLAKHLGVEAS
|
||||
LKPTKWDGMLASLDSKRIDVVINQVTISDERKKKYDFSTPYTISGIQALVKKgNEGTIKTADDLKGKKVGVGLGTNYEEWLRQN--VQGVDVRTYDDdpT
|
||||
KYQDLRVGRIDAILVDRLAALD.LVKKTN.DTLAVTGEAFS.....RQESGVALRK--GNEDLLKAVNDAIAEMQKDGTLQALSEKWFGADVTK....
|
||||
>Deinococcus_radiodurans
|
||||
MKKSll......SLKlsglLVPSVlAlsLSACss...............psSTLNQGTLKIAMEGTYPPFTSKNe.QGELVGFDVDIAKAVAQKLNLKPE
|
||||
FVLTEWSGILAGLQANKYDVIVNQVGITPERQNSIGFSQPYAYSRPEIIVAKnNTFNPQSLADLKGKRVGSTLGSNYEKQLI-D--TGDIKIVTYPGapE
|
||||
ILADLVAGRIDAAYNDRLVVNY.IIND-QkLPVRGAGQIGD.....AAPVGIALKK--GNSALKDQIDKALTEMRSDGTFEKISQKWFGQDVGQ...p
|
||||
>B_subtilis_GlnH_homo_YCKK
|
||||
MKKAll......ALF....MVVSIaA..LAACgagndnqskdnakdgdlwaSIKKKGVLTVGTEGTYEPFTYHDkdTDKLTGYDVEVITEVAKRLGLKVD
|
||||
FKETQWGSMFAGLNSKRFDVVANQVGKTD-REDKYDFSDKYTTSRAVVVTKK.DNNDIKSEADVKGKTSAQSLTSNYNKLAT-N--A-GAKVEGVEGmaQ
|
||||
ALQMIQQARVDMTYNDKLAVLN.YLKTSGnKNVKIAFETGE.....PQSTYFTFRK--GSGEVVDQVNKALKEMKEDGTLSKISKKWFGEDVSK....
|
||||
>YA80_HAEIN
|
||||
MKKLlf......TTA....LLTGAiA..FSTFs...........hageiadRVEKTKTLLVGTEGTYAPFTFHDk.SGKLTGFDVEVIRKVAEKLGLKVE
|
||||
FKETQWDAMYAGLNAKRFDVIANQTNPSPERLKKYSFTTPYNYSGGVIVTKS.SDNSIKSFEDLKGRKSAQSATSNWGKDAK-A--A-GAQILVVDGlaQ
|
||||
SLELIKQGRAEATINDKLAVLD.YFKQHPnSGLKIAYDRGD.....KTPTAFAFLQ--GEDALITKFNQVLEALRQDGTLKQISIEWFGYDITQ....
|
||||
>E_coli_GlnH
|
||||
MKSVl.......KVS....LAALTlA..FAVSsh.........a.......---ADKKLVVATDTAFVPFEFKQ..GDKYVGFDVDLWAAIAKELKLDYE
|
||||
LKPMDFSGIIPALQTKNVDLALAGITITDERKKAIDFSDGYYKSGLLVMVKAn-NNDVKSVKDLDGKVVAVKSGTGSVDYAKAN--IKTKDLRQFPNidN
|
||||
AYMELGTNRADAVLHDTPNILY.FIKTAGnGQFKAVGDSLE.....AQQYGIAFPK--GSDELRDKVNGALKTLRENGTYNEIYKKWFGTEP-K....
|
||||
>HISJ_E_COLI
|
||||
MKKLvl......SLS....LV---lA..FSSAta...............a.FAAIPQNIRIGTDPTYAPFESKNs.QGELVGFDIDLAKELCKRINTQCT
|
||||
FVENPLDALIPSLKAKKIDAIMSSLSITEKRQQEIAFTDKLYAADSRLVVAK.NSDIQPTVESLKGKRVGVLQGTTQETFGNEHWAPKGIEIVSYQGqdN
|
||||
IYSDLTAGRIDAAFQDEVAASEgFLKQPVgKDYKFGGPSVKdeklfGVGTGMGLRK--EDNELREALNKAFAEMRADGTYEKLAKKYFDFDVYG...g
|
24
Tests/Clustalw/muscle.a2m
Normal file
24
Tests/Clustalw/muscle.a2m
Normal file
@ -0,0 +1,24 @@
|
||||
>Test1seq
|
||||
.................................................................AGTTACAATAACTGACGAAGCTAAGTAGGCTACTA
|
||||
ATTAACGTCATCAACCTAATACATAGCACTTAGAAAAAAGTGAAGTAAGAAAATATAAAATAATAAAAGGGTGGGTTATCAATTGATAGTGTAAATCATC
|
||||
GTATTCCGGTGATATACCCTACCACAAAAACTCAAACCGACTTGATTCAAATCATCTCAATAAATTAGCGCCAAAATAATGAAAAAAATAATAACAAACA
|
||||
AAAACAAACCAAAATAAGAAAAAACATTACGCAAAACATAATAATTTACTCTTCGTTATTGTATTAACAAATCAAAGAGCTGAATTTTGATCACCTGCTA
|
||||
ATACTACTTTCTGTATTGATCCTATATCAACGTAAACAAAGATACTAATAATTAACTAAAAGTACGTTCATCGATCGTGTTCGTTGACGAAGAAGAGCTC
|
||||
TATCTCCGGCGGAGCAAAGAAAACGATCTGTCTCCGTCGTAACACACGGTCGCTAGAGAAACTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCC
|
||||
GGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCGTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTT.
|
||||
>AT3G20900.1-SEQ
|
||||
atgaacaaagtagcgaggaagaacaaaacatcaggtgaacaaaaaaaaaactcaatccacatcaaAGTTACAATAACTGACGAAGCTAAGTAGGCTAGAA
|
||||
ATTAAAGTCATCAACCTAATACATAGCACTTAGAAAAAAGTGAAGCAAGAAAATATAAAATAATAAAAGGGTGGGTTATCAATTGATAGTGTAAATCATA
|
||||
GTTGATTTTTGATATACCCTACCACAAAAACTCAAACCGACTTGATTCAAATCATCTCAAAAAACAAGCGCCAAAATAATGAAAAAAATAATAACAAAAA
|
||||
CAAACAAACCAAAATAAGAAAAAACATTACGCAAAACATAATAATTTACTCTTCGTTATTGTATTAACAAATCAAAGAGATGAATTTTGATCACCTGCTA
|
||||
ATACTACTTTCTGTATTGATCCTATATCAAAAAAAAAAAAGATACTAATAATTAACTAAAAGTACGTTCATCGATCGTGTGCGTTGACGAAGAAGAGCTC
|
||||
TATCTCCGGCGGAGCAAAGAAAACGATCTGTCTCCGTCGTAACACACAGTTTTTCGAGACCCTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCC
|
||||
GGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCCTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTAg
|
||||
>AT3G20900.1-CDS
|
||||
.................................................................-----------------------------------
|
||||
----------------------------------------------------------------------------------------------------
|
||||
------------------------------------------------------------------------------ATGAACAAAGTAGCGAGGAAGA
|
||||
A------------------------------CAAAACATC------------------------------------------------------------
|
||||
----------------------------------------------------------------------------------------------------
|
||||
------------AGCAAAGAAAACGATCTGTCTCCGTCGTAACACACAGTTTTTCGAGACCCTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCC
|
||||
GGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCCTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTAg
|
15
Tests/Clustalw/probcons.a2m
Normal file
15
Tests/Clustalw/probcons.a2m
Normal file
@ -0,0 +1,15 @@
|
||||
>plas_horvu
|
||||
D.VLLGANGGVLVFEPNDFSVKAGETITFKNNAGYPHNVVFDEDAVPSG.VD.VSKISQEEYLTAPGETFSVTLTV...PGTYGFYCEPHAGAGMVGKVT
|
||||
V
|
||||
>plas_chlre
|
||||
-.VKLGADSGALEFVPKTLTIKSGETVNFVNNAGFPHNIVFDEDAIPSG.VN.ADAISRDDYLNAPGETYSVKLTA...AGEYGYYCEPHQGAGMVGKII
|
||||
V
|
||||
>plas_anava
|
||||
-.VKLGSDKGLLVFEPAKLTIKPGDTVEFLNNKVPPHNVVFDAALNPAKsADlAKSLSHKQLLMSPGQSTSTTFPAdapAGEYTFYCEPHRGAGMVGKIT
|
||||
V
|
||||
>plas_proho
|
||||
VqIKMGTDKYAPLYEPKALSISAGDTVEFVMNKVGPHNVIFDK--VPAG.ES.APALSNTKLRIAPGSFYSVTLGT...PGTYSFYCTPHRGAGMVGTIT
|
||||
V
|
||||
>azup_achcy
|
||||
VhMLNKGKDGAMVFEPASLKVAPGDTVTFIPTDK-GHNVETIKGMIPDG.AE.A-------FKSKINENYKVTFTA...PGVYGVKCTPHYGMGMVGVVE
|
||||
V
|
321
Tests/test_Align_a2m.py
Normal file
321
Tests/test_Align_a2m.py
Normal file
@ -0,0 +1,321 @@
|
||||
# Copyright 2006-2014 by Peter Cock. All rights reserved.
|
||||
# Copyright 2022 by Michiel de Hoon. All rights reserved.
|
||||
# This code is part of the Biopython distribution and governed by its
|
||||
# license. Please see the LICENSE file that should have been included
|
||||
# as part of this package.
|
||||
"""Tests for Bio.Align.a2m module."""
|
||||
import unittest
|
||||
import warnings
|
||||
|
||||
from io import StringIO
|
||||
|
||||
from Bio import BiopythonExperimentalWarning
|
||||
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore", BiopythonExperimentalWarning)
|
||||
from Bio.Align.a2m import AlignmentIterator
|
||||
from Bio.Align.a2m import AlignmentWriter
|
||||
|
||||
|
||||
class TestA2MReadingWriting(unittest.TestCase):
|
||||
def check_reading_writing(self, path):
|
||||
alignments = AlignmentIterator(path)
|
||||
stream = StringIO()
|
||||
writer = AlignmentWriter(stream)
|
||||
n = writer.write_file(alignments, mincount=1, maxcount=1)
|
||||
self.assertEqual(n, 1)
|
||||
alignments = AlignmentIterator(path)
|
||||
alignment = next(alignments)
|
||||
stream.seek(0)
|
||||
saved_alignments = AlignmentIterator(stream)
|
||||
saved_alignment = next(saved_alignments)
|
||||
with self.assertRaises(StopIteration):
|
||||
next(saved_alignments)
|
||||
self.assertEqual(len(alignment), len(saved_alignment))
|
||||
for i, (sequence, saved_sequence) in enumerate(
|
||||
zip(alignment.sequences, saved_alignment.sequences)
|
||||
):
|
||||
self.assertEqual(sequence.id, saved_sequence.id)
|
||||
self.assertEqual(sequence.seq, saved_sequence.seq)
|
||||
self.assertEqual(alignment[i], saved_alignment[i])
|
||||
|
||||
def test_clustalw(self):
|
||||
path = "Clustalw/clustalw.a2m"
|
||||
with open(path) as stream:
|
||||
alignments = AlignmentIterator(stream)
|
||||
alignment = next(alignments)
|
||||
with self.assertRaises(StopIteration):
|
||||
next(alignments)
|
||||
self.assertEqual(
|
||||
repr(alignment),
|
||||
"<Bio.Align.Alignment object (2 rows x 601 columns) at 0x%x>"
|
||||
% id(alignment),
|
||||
)
|
||||
self.assertEqual(len(alignment), 2)
|
||||
self.assertEqual(alignment.sequences[0].id, "gi|4959044|gb|AAD34209.1|AF069")
|
||||
self.assertEqual(alignment.sequences[1].id, "gi|671626|emb|CAA85685.1|")
|
||||
self.assertEqual(
|
||||
alignment.sequences[0].seq,
|
||||
"MENSDSNDKGSDQSAAQRRSQMDRLDREEAFYQFVNNLSEEDYRLMRDNNLLGTPGESTEEELLRRLQQIKEGPPPQSPDENRAGESSDDVTNSDSIIDWLNSVRQTGNTTRSRQRGNQSWRAVSRTNPNSGDFRFSLEINVNRNNGSQTSENESEPSTRRLSVENMESSSQRQMENSASESASARPSRAERNSTEAVTEVPTTRAQRRARSRSPEHRRTRARAERSMSPLQPTSEIPRRAPTLEQSSENEPEGSSRTRHHVTLRQQISGPELLGRGLFAASGSRNPSQGTSSSDTGSNSESSGSGQRPPTIVLDLQVRRVRPGEYRQRDSIASRTRSRSQAPNNTVTYESERGGFRRTFSRSERAGVRTYVSTIRIPIRRILNTGLSETTSVAIQTMLRQIMTGFGELSYFMYSDSDSEPSASVSSRNVERVESRNGRGSSGGGNSSGSSSSSSPSPSSSGESSESSSKMFEGSSEGGSSGPSRKDGRHRAPVTFDESGSLPFFSLAQFFLLNEDDEDQPRGLTKEQIDNLAMRSFGENDALKTCSVCITEYTEGDKLRKLPCSHEFHVHCIDRWLSENSTCPICRRAVLSSGNRESVV",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment.sequences[1].seq,
|
||||
"MSPQTETKASVGFKAGVKEYKLTYYTPEYETKDTDILAAFRVTPQPGVPPEEAGAAVAAESSTGTWTTVWTDGLTSLDRYKGRCYHIEPVPGEKDQCICYVAYPLDLFEEGSVTNMFTSIVGNVFGFKALRALRLEDLRIPVAYVKTFQGPPHGIQVERDKLNKYGRPLLGCTIKPKLGLSAKNYGRAVYECLRGGLDFTKDDENVNSQPFMRWRDRFLFCAEAIYKAQAETGEIKGHYLNATAGTCEEMIKRAIFARELGVPIVMHDYLTGGFTANTSLAHYCRDNGLLLHIHRAMHAVIDRQKNHGMHFRVLAKALRLSGGDHIHSGTVVGKLEGERDITLGFVDLLRDDFIEKDRSRGIYFTQDWVSLPGVIPVASGGIHVWHMPALTEIFGDDSVLQFGGGTLGHPWGNAPGAVANRVAVEACVKARNEGRDLAAEGNAIIREACKWSPELAAACEVWKEIKFEFPAMD",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment[0],
|
||||
"MENSDSNDKGSDQSAAQRRSQMDRLDREEAFYQFVNNLSEEDYRLMRDNNLLGTPGESTEEELLRRLQQIKEGPPPQSPDENRAGESSDDVTNSDSIIDWLNSVRQTGNTTRSRQRGNQSWRAVSRTNPNSGDFRFSLEINVNRNNGSQTSENESEPSTRRLSVENMESSSQRQMENSASESASARPSRAERNSTEAVTEVPTTRAQRRARSRSPEHRRTRARAERSMSPLQPTSEIPRRAPTLEQSSENEPEGSSRTRHHVTLRQQISGPELLGRGLFAASGSRNPSQGTSSSDTGSNSESSGSGQRPPTIVLDLQVRRVRPGEYRQRDSIASRTRSRSQAPNNTVTYESERGGFRRTFSRSERAGVRTYVSTIRIPIRRILNTGLSETTSVAIQTMLRQIMTGFGELSYFMYSDSDSEPSASVSSRNVERVESRNGRGSSGGGNSSGSSSSSSPSPSSSGESSESSSKMFEGSSEGGSSGPSRKDGRHRAPVTFDESGSLPFFSLAQFFLLNEDDEDQPRGLTKEQIDNLAMRSFGENDALKTCSVCITEYTEGDKLRKLPCSHEFHVHCIDRWLSE-NSTCPICRRAVLSSGNRESVV",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment[1],
|
||||
"---------MSPQTETKASVGFKAGVKEYKLTYYTPEYETKDTDILAAFRVTPQPG-----------------VPPEEAGAAVAAESSTGT---------WTTVWTDGLTSLDRYKG-----RCYHIEPVPG-------------------EKDQCICYVAYPLDLFEEGSVTNMFTSIVGNVFGFKALRALRLEDLRIPVAYVKTFQGPPHGIQVERDKLNKYGRPLLGCTIKPKLGLSAKNYGRAVYECLRGGLDFTKDDENVNSQPFMRWRDRFLFCAEAIYKAQAETGEIKGHYLNATAG-----------------------TCEEMIKRAIFARELGVPIVMHDYLTGGFTANTSLAHYCRDNGLLLHIHRAMHAVIDRQKNHGMHFRVLAKALRLSGGDHIHSGTVVGKLEGERDITLGFVDLLRDDFIEKDRSRGIYFTQDWVSLPGVIPVASG-----------------------------GIHVWHMPALTEIFGDDSVLQFGGGTLGHPWGNAPGAVANRVA-----------VEACVKARNEG---RDLAAEGNAIIREACKWSPELAAACEVWKEIKFEFPAMD---",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment.column_annotations["state"],
|
||||

|
||||
)
|
||||
self.check_reading_writing(path)
|
||||
|
||||
def test_msaprobs(self):
|
||||
path = "Clustalw/msaprobs.a2m"
|
||||
# This example was obtained from
|
||||
# http://virgil.ruc.dk/kurser/Sekvens/Treedraw.htm
|
||||
# and converted to A2M format.
|
||||
with open(path) as stream:
|
||||
alignments = AlignmentIterator(stream)
|
||||
alignment = next(alignments)
|
||||
with self.assertRaises(StopIteration):
|
||||
next(alignments)
|
||||
self.assertEqual(
|
||||
repr(alignment),
|
||||
"<Bio.Align.Alignment object (8 rows x 298 columns) at 0x%x>"
|
||||
% id(alignment),
|
||||
)
|
||||
self.assertEqual(len(alignment), 8)
|
||||
self.assertEqual(alignment.shape, (8, 298))
|
||||
self.assertEqual(alignment.sequences[0].id, "V_Harveyi_PATH")
|
||||
self.assertEqual(alignment.sequences[1].id, "B_subtilis_YXEM")
|
||||
self.assertEqual(alignment.sequences[2].id, "FLIY_ECOLI")
|
||||
self.assertEqual(alignment.sequences[3].id, "Deinococcus_radiodurans")
|
||||
self.assertEqual(alignment.sequences[4].id, "B_subtilis_GlnH_homo_YCKK")
|
||||
self.assertEqual(alignment.sequences[5].id, "YA80_HAEIN")
|
||||
self.assertEqual(alignment.sequences[6].id, "E_coli_GlnH")
|
||||
self.assertEqual(alignment.sequences[7].id, "HISJ_E_COLI")
|
||||
self.assertEqual(
|
||||
alignment.sequences[0].seq,
|
||||
"MKNWIKVAVAAIALSAATVQAATEVKVGMSGRYFPFTFVKQDKLQGFEVDMWDEIGKRNDYKIEYVTANFSGLFGLLETGRIDTISNQITMTDARKAKYLFADPYVVDGAQITVRKGNDSIQGVEDLAGKTVAVNLGSNFEQLLRDYDKDGKINIKTYDTGIEHDVALGRADAFIMDRLSALELIKKTGLPLQLAGEPFETIQNAWPFVDNEKGRKLQAEVNKALAEMRADGTVEKISVKWFGADITK",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment.sequences[1].seq,
|
||||
"MKMKKWTVLVVAALLAVLSACGNGNSSSKEDDNVLHVGATGQSYPFAYKENGKLTGFDVEVMEAVAKKIDMKLDWKLLEFSGLMGELQTGKLDTISNQVAVTDERKETYNFTKPYAYAGTQIVVKKDNTDIKSVDDLKGKTVAAVLGSNHAKNLESKDPDKKINIKTYETQEGTLKDVAYGRVDAYVNSRTVLIAQIKKTGLPLKLAGDPIVYEQVAFPFAKDDAHDKLRKKVNKALDELRKDGTLKKLSEKYFNEDITVEQKH",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment.sequences[2].seq,
|
||||
"MKLAHLGRQALMGVMAVALVAGMSVKSFADEGLLNKVKERGTLLVGLEGTYPPFSFQGDDGKLTGFEVEFAQQLAKHLGVEASLKPTKWDGMLASLDSKRIDVVINQVTISDERKKKYDFSTPYTISGIQALVKKGNEGTIKTADDLKGKKVGVGLGTNYEEWLRQNVQGVDVRTYDDDPTKYQDLRVGRIDAILVDRLAALDLVKKTNDTLAVTGEAFSRQESGVALRKGNEDLLKAVNDAIAEMQKDGTLQALSEKWFGADVTK",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment.sequences[3].seq,
|
||||
"MKKSLLSLKLSGLLVPSVLALSLSACSSPSSTLNQGTLKIAMEGTYPPFTSKNEQGELVGFDVDIAKAVAQKLNLKPEFVLTEWSGILAGLQANKYDVIVNQVGITPERQNSIGFSQPYAYSRPEIIVAKNNTFNPQSLADLKGKRVGSTLGSNYEKQLIDTGDIKIVTYPGAPEILADLVAGRIDAAYNDRLVVNYIINDQKLPVRGAGQIGDAAPVGIALKKGNSALKDQIDKALTEMRSDGTFEKISQKWFGQDVGQP",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment.sequences[4].seq,
|
||||
"MKKALLALFMVVSIAALAACGAGNDNQSKDNAKDGDLWASIKKKGVLTVGTEGTYEPFTYHDKDTDKLTGYDVEVITEVAKRLGLKVDFKETQWGSMFAGLNSKRFDVVANQVGKTDREDKYDFSDKYTTSRAVVVTKKDNNDIKSEADVKGKTSAQSLTSNYNKLATNAGAKVEGVEGMAQALQMIQQARVDMTYNDKLAVLNYLKTSGNKNVKIAFETGEPQSTYFTFRKGSGEVVDQVNKALKEMKEDGTLSKISKKWFGEDVSK",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment.sequences[5].seq,
|
||||
"MKKLLFTTALLTGAIAFSTFSHAGEIADRVEKTKTLLVGTEGTYAPFTFHDKSGKLTGFDVEVIRKVAEKLGLKVEFKETQWDAMYAGLNAKRFDVIANQTNPSPERLKKYSFTTPYNYSGGVIVTKSSDNSIKSFEDLKGRKSAQSATSNWGKDAKAAGAQILVVDGLAQSLELIKQGRAEATINDKLAVLDYFKQHPNSGLKIAYDRGDKTPTAFAFLQGEDALITKFNQVLEALRQDGTLKQISIEWFGYDITQ",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment.sequences[6].seq,
|
||||
"MKSVLKVSLAALTLAFAVSSHAADKKLVVATDTAFVPFEFKQGDKYVGFDVDLWAAIAKELKLDYELKPMDFSGIIPALQTKNVDLALAGITITDERKKAIDFSDGYYKSGLLVMVKANNNDVKSVKDLDGKVVAVKSGTGSVDYAKANIKTKDLRQFPNIDNAYMELGTNRADAVLHDTPNILYFIKTAGNGQFKAVGDSLEAQQYGIAFPKGSDELRDKVNGALKTLRENGTYNEIYKKWFGTEPK",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment.sequences[7].seq,
|
||||
"MKKLVLSLSLVLAFSSATAAFAAIPQNIRIGTDPTYAPFESKNSQGELVGFDIDLAKELCKRINTQCTFVENPLDALIPSLKAKKIDAIMSSLSITEKRQQEIAFTDKLYAADSRLVVAKNSDIQPTVESLKGKRVGVLQGTTQETFGNEHWAPKGIEIVSYQGQDNIYSDLTAGRIDAAFQDEVAASEGFLKQPVGKDYKFGGPSVKDEKLFGVGTGMGLRKEDNELREALNKAFAEMRADGTYEKLAKKYFDFDVYGG",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment[0],
|
||||
"MKNW--------IKV----AVAAI-A--LSAA-------------------TVQAATEVKVGMSGRYFPFTFVK--QDKLQGFEVDMWDEIGKRNDYKIEYVTANFSGLFGLLETGRIDTISNQITMTDARKAKYLFADPYVVDGAQITVRK-GNDSIQGVEDLAGKTVAVNLGSNFEQLLRDYDKDGKINIKTYDT--GIEHDVALGRADAFIMDRLSALE-LIKKTG-LPLQLAGEPFE-----TIQNAWPFVDNEKGRKLQAEVNKALAEMRADGTVEKISVKWFGADITK----",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment[1],
|
||||
"MKMKKW------TVL----VVAALLA-VLSACGN------------G-NSSSKEDDNVLHVGATGQSYPFAYKE--NGKLTGFDVEVMEAVAKKIDMKLDWKLLEFSGLMGELQTGKLDTISNQVAVTDERKETYNFTKPYAYAGTQIVVKK-DNTDIKSVDDLKGKTVAAVLGSNHAKNLESKDPDKKINIKTYETQEGTLKDVAYGRVDAYVNSRTVLIA-QIKKTG-LPLKLAGDPIV-----YEQVAFPFAKDDAHDKLRKKVNKALDELRKDGTLKKLSEKYFNEDITVEQKH",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment[2],
|
||||
"MKLAHLGRQALMGVM----AVALVAG--MSVKSF---------ADEG-LLNKVKERGTLLVGLEGTYPPFSFQGD-DGKLTGFEVEFAQQLAKHLGVEASLKPTKWDGMLASLDSKRIDVVINQVTISDERKKKYDFSTPYTISGIQALVKKGNEGTIKTADDLKGKKVGVGLGTNYEEWLRQN--VQGVDVRTYDDDPTKYQDLRVGRIDAILVDRLAALD-LVKKTN-DTLAVTGEAFS-----RQESGVALRK--GNEDLLKAVNDAIAEMQKDGTLQALSEKWFGADVTK----",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment[3],
|
||||
"MKKSLL------SLKLSGLLVPSVLALSLSACSS---------------PSSTLNQGTLKIAMEGTYPPFTSKNE-QGELVGFDVDIAKAVAQKLNLKPEFVLTEWSGILAGLQANKYDVIVNQVGITPERQNSIGFSQPYAYSRPEIIVAKNNTFNPQSLADLKGKRVGSTLGSNYEKQLI-D--TGDIKIVTYPGAPEILADLVAGRIDAAYNDRLVVNY-IIND-QKLPVRGAGQIGD-----AAPVGIALKK--GNSALKDQIDKALTEMRSDGTFEKISQKWFGQDVGQ---P",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment[4],
|
||||
"MKKALL------ALF----MVVSIAA--LAACGAGNDNQSKDNAKDGDLWASIKKKGVLTVGTEGTYEPFTYHDKDTDKLTGYDVEVITEVAKRLGLKVDFKETQWGSMFAGLNSKRFDVVANQVGKTD-REDKYDFSDKYTTSRAVVVTKK-DNNDIKSEADVKGKTSAQSLTSNYNKLAT-N--A-GAKVEGVEGMAQALQMIQQARVDMTYNDKLAVLN-YLKTSGNKNVKIAFETGE-----PQSTYFTFRK--GSGEVVDQVNKALKEMKEDGTLSKISKKWFGEDVSK----",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment[5],
|
||||
"MKKLLF------TTA----LLTGAIA--FSTFS-----------HAGEIADRVEKTKTLLVGTEGTYAPFTFHDK-SGKLTGFDVEVIRKVAEKLGLKVEFKETQWDAMYAGLNAKRFDVIANQTNPSPERLKKYSFTTPYNYSGGVIVTKS-SDNSIKSFEDLKGRKSAQSATSNWGKDAK-A--A-GAQILVVDGLAQSLELIKQGRAEATINDKLAVLD-YFKQHPNSGLKIAYDRGD-----KTPTAFAFLQ--GEDALITKFNQVLEALRQDGTLKQISIEWFGYDITQ----",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment[6],
|
||||
"MKSVL-------KVS----LAALTLA--FAVSSH---------A----------ADKKLVVATDTAFVPFEFKQ--GDKYVGFDVDLWAAIAKELKLDYELKPMDFSGIIPALQTKNVDLALAGITITDERKKAIDFSDGYYKSGLLVMVKAN-NNDVKSVKDLDGKVVAVKSGTGSVDYAKAN--IKTKDLRQFPNIDNAYMELGTNRADAVLHDTPNILY-FIKTAGNGQFKAVGDSLE-----AQQYGIAFPK--GSDELRDKVNGALKTLRENGTYNEIYKKWFGTEP-K----",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment[7],
|
||||
"MKKLVL------SLS----LV---LA--FSSATA---------------A-FAAIPQNIRIGTDPTYAPFESKNS-QGELVGFDIDLAKELCKRINTQCTFVENPLDALIPSLKAKKIDAIMSSLSITEKRQQEIAFTDKLYAADSRLVVAK-NSDIQPTVESLKGKRVGVLQGTTQETFGNEHWAPKGIEIVSYQGQDNIYSDLTAGRIDAAFQDEVAASEGFLKQPVGKDYKFGGPSVKDEKLFGVGTGMGLRK--EDNELREALNKAFAEMRADGTYEKLAKKYFDFDVYG---G",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment.column_annotations["state"],
|
||||
"DDDDIIIIIIIIDDDIIIIDDDDDIDIIDDDDIIIIIIIIIIIIIIIIIIIDDDDDDDDDDDDDDDDDDDDDDDIIDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDIDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDIIDDDDDDDDDDDDDDDDDDDDDDDIDDDDDDIDDDDDDDDDDDIIIIIDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDIIII",
|
||||
)
|
||||
self.check_reading_writing(path)
|
||||
|
||||
def test_muscle(self):
|
||||
path = "Clustalw/muscle.a2m"
|
||||
with open(path) as stream:
|
||||
alignments = AlignmentIterator(stream)
|
||||
alignment = next(alignments)
|
||||
with self.assertRaises(StopIteration):
|
||||
next(alignments)
|
||||
self.assertEqual(
|
||||
repr(alignment),
|
||||
"<Bio.Align.Alignment object (3 rows x 687 columns) at 0x%x>"
|
||||
% id(alignment),
|
||||
)
|
||||
self.assertEqual(len(alignment), 3)
|
||||
self.assertEqual(alignment.sequences[0].id, "Test1seq")
|
||||
self.assertEqual(alignment.sequences[1].id, "AT3G20900.1-SEQ")
|
||||
self.assertEqual(alignment.sequences[2].id, "AT3G20900.1-CDS")
|
||||
self.assertEqual(
|
||||
alignment.sequences[0].seq,
|
||||
"AGTTACAATAACTGACGAAGCTAAGTAGGCTACTAATTAACGTCATCAACCTAATACATAGCACTTAGAAAAAAGTGAAGTAAGAAAATATAAAATAATAAAAGGGTGGGTTATCAATTGATAGTGTAAATCATCGTATTCCGGTGATATACCCTACCACAAAAACTCAAACCGACTTGATTCAAATCATCTCAATAAATTAGCGCCAAAATAATGAAAAAAATAATAACAAACAAAAACAAACCAAAATAAGAAAAAACATTACGCAAAACATAATAATTTACTCTTCGTTATTGTATTAACAAATCAAAGAGCTGAATTTTGATCACCTGCTAATACTACTTTCTGTATTGATCCTATATCAACGTAAACAAAGATACTAATAATTAACTAAAAGTACGTTCATCGATCGTGTTCGTTGACGAAGAAGAGCTCTATCTCCGGCGGAGCAAAGAAAACGATCTGTCTCCGTCGTAACACACGGTCGCTAGAGAAACTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCCGGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCGTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTT",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment.sequences[1].seq,
|
||||
"ATGAACAAAGTAGCGAGGAAGAACAAAACATCAGGTGAACAAAAAAAAAACTCAATCCACATCAAAGTTACAATAACTGACGAAGCTAAGTAGGCTAGAAATTAAAGTCATCAACCTAATACATAGCACTTAGAAAAAAGTGAAGCAAGAAAATATAAAATAATAAAAGGGTGGGTTATCAATTGATAGTGTAAATCATAGTTGATTTTTGATATACCCTACCACAAAAACTCAAACCGACTTGATTCAAATCATCTCAAAAAACAAGCGCCAAAATAATGAAAAAAATAATAACAAAAACAAACAAACCAAAATAAGAAAAAACATTACGCAAAACATAATAATTTACTCTTCGTTATTGTATTAACAAATCAAAGAGATGAATTTTGATCACCTGCTAATACTACTTTCTGTATTGATCCTATATCAAAAAAAAAAAAGATACTAATAATTAACTAAAAGTACGTTCATCGATCGTGTGCGTTGACGAAGAAGAGCTCTATCTCCGGCGGAGCAAAGAAAACGATCTGTCTCCGTCGTAACACACAGTTTTTCGAGACCCTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCCGGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCCTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTAG",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment.sequences[2].seq,
|
||||
"ATGAACAAAGTAGCGAGGAAGAACAAAACATCAGCAAAGAAAACGATCTGTCTCCGTCGTAACACACAGTTTTTCGAGACCCTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCCGGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCCTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTAG",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment[0],
|
||||
"-----------------------------------------------------------------AGTTACAATAACTGACGAAGCTAAGTAGGCTACTAATTAACGTCATCAACCTAATACATAGCACTTAGAAAAAAGTGAAGTAAGAAAATATAAAATAATAAAAGGGTGGGTTATCAATTGATAGTGTAAATCATCGTATTCCGGTGATATACCCTACCACAAAAACTCAAACCGACTTGATTCAAATCATCTCAATAAATTAGCGCCAAAATAATGAAAAAAATAATAACAAACAAAAACAAACCAAAATAAGAAAAAACATTACGCAAAACATAATAATTTACTCTTCGTTATTGTATTAACAAATCAAAGAGCTGAATTTTGATCACCTGCTAATACTACTTTCTGTATTGATCCTATATCAACGTAAACAAAGATACTAATAATTAACTAAAAGTACGTTCATCGATCGTGTTCGTTGACGAAGAAGAGCTCTATCTCCGGCGGAGCAAAGAAAACGATCTGTCTCCGTCGTAACACACGGTCGCTAGAGAAACTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCCGGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCGTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTT-",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment[1],
|
||||
"ATGAACAAAGTAGCGAGGAAGAACAAAACATCAGGTGAACAAAAAAAAAACTCAATCCACATCAAAGTTACAATAACTGACGAAGCTAAGTAGGCTAGAAATTAAAGTCATCAACCTAATACATAGCACTTAGAAAAAAGTGAAGCAAGAAAATATAAAATAATAAAAGGGTGGGTTATCAATTGATAGTGTAAATCATAGTTGATTTTTGATATACCCTACCACAAAAACTCAAACCGACTTGATTCAAATCATCTCAAAAAACAAGCGCCAAAATAATGAAAAAAATAATAACAAAAACAAACAAACCAAAATAAGAAAAAACATTACGCAAAACATAATAATTTACTCTTCGTTATTGTATTAACAAATCAAAGAGATGAATTTTGATCACCTGCTAATACTACTTTCTGTATTGATCCTATATCAAAAAAAAAAAAGATACTAATAATTAACTAAAAGTACGTTCATCGATCGTGTGCGTTGACGAAGAAGAGCTCTATCTCCGGCGGAGCAAAGAAAACGATCTGTCTCCGTCGTAACACACAGTTTTTCGAGACCCTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCCGGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCCTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTAG",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment[2],
|
||||
"--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ATGAACAAAGTAGCGAGGAAGAA------------------------------CAAAACATC----------------------------------------------------------------------------------------------------------------------------------------------------------------------------AGCAAAGAAAACGATCTGTCTCCGTCGTAACACACAGTTTTTCGAGACCCTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCCGGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCCTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTAG",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment.column_annotations["state"],
|
||||

|
||||
)
|
||||
self.check_reading_writing(path)
|
||||
|
||||
def test_kalign(self):
|
||||
path = "Clustalw/kalign.a2m"
|
||||
with open(path) as stream:
|
||||
alignments = AlignmentIterator(stream)
|
||||
alignment = next(alignments)
|
||||
with self.assertRaises(StopIteration):
|
||||
next(alignments)
|
||||
self.assertEqual(
|
||||
repr(alignment),
|
||||
"<Bio.Align.Alignment object (2 rows x 27 columns) at 0x%x>"
|
||||
% id(alignment),
|
||||
)
|
||||
self.assertEqual(len(alignment), 2)
|
||||
self.assertEqual(alignment.sequences[0].id, "Test1seq")
|
||||
self.assertEqual(alignment.sequences[1].id, "AT3G20900")
|
||||
self.assertEqual(alignment.sequences[0].seq, "GCTGGGGATGGAGAGGGAACAGAGTT")
|
||||
self.assertEqual(alignment.sequences[1].seq, "GCTGGGGATGGAGAGGGAACAGAGTAG")
|
||||
self.assertEqual(alignment[0], "GCTGGGGATGGAGAGGGAACAGAGT-T")
|
||||
self.assertEqual(alignment[1], "GCTGGGGATGGAGAGGGAACAGAGTAG")
|
||||
self.assertEqual(
|
||||
alignment.column_annotations["state"],
|
||||
"DDDDDDDDDDDDDDDDDDDDDDDDDID",
|
||||
)
|
||||
self.check_reading_writing(path)
|
||||
|
||||
def test_probcons(self):
|
||||
path = "Clustalw/probcons.a2m"
|
||||
# example taken from the PROBCONS documentation,
|
||||
# and converted to aligned A2M format.
|
||||
with open(path) as stream:
|
||||
alignments = AlignmentIterator(stream)
|
||||
alignment = next(alignments)
|
||||
with self.assertRaises(StopIteration):
|
||||
next(alignments)
|
||||
self.assertEqual(
|
||||
repr(alignment),
|
||||
"<Bio.Align.Alignment object (5 rows x 101 columns) at 0x%x>"
|
||||
% id(alignment),
|
||||
)
|
||||
self.assertEqual(len(alignment), 5)
|
||||
self.assertEqual(alignment.sequences[0].id, "plas_horvu")
|
||||
self.assertEqual(alignment.sequences[1].id, "plas_chlre")
|
||||
self.assertEqual(alignment.sequences[2].id, "plas_anava")
|
||||
self.assertEqual(alignment.sequences[3].id, "plas_proho")
|
||||
self.assertEqual(alignment.sequences[4].id, "azup_achcy")
|
||||
self.assertEqual(
|
||||
alignment.sequences[0].seq,
|
||||
"DVLLGANGGVLVFEPNDFSVKAGETITFKNNAGYPHNVVFDEDAVPSGVDVSKISQEEYLTAPGETFSVTLTVPGTYGFYCEPHAGAGMVGKVTV",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment.sequences[1].seq,
|
||||
"VKLGADSGALEFVPKTLTIKSGETVNFVNNAGFPHNIVFDEDAIPSGVNADAISRDDYLNAPGETYSVKLTAAGEYGYYCEPHQGAGMVGKIIV",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment.sequences[2].seq,
|
||||
"VKLGSDKGLLVFEPAKLTIKPGDTVEFLNNKVPPHNVVFDAALNPAKSADLAKSLSHKQLLMSPGQSTSTTFPADAPAGEYTFYCEPHRGAGMVGKITV",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment.sequences[3].seq,
|
||||
"VQIKMGTDKYAPLYEPKALSISAGDTVEFVMNKVGPHNVIFDKVPAGESAPALSNTKLRIAPGSFYSVTLGTPGTYSFYCTPHRGAGMVGTITV",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment.sequences[4].seq,
|
||||
"VHMLNKGKDGAMVFEPASLKVAPGDTVTFIPTDKGHNVETIKGMIPDGAEAFKSKINENYKVTFTAPGVYGVKCTPHYGMGMVGVVEV",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment[0],
|
||||
"D-VLLGANGGVLVFEPNDFSVKAGETITFKNNAGYPHNVVFDEDAVPSG-VD-VSKISQEEYLTAPGETFSVTLTV---PGTYGFYCEPHAGAGMVGKVTV",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment[1],
|
||||
"--VKLGADSGALEFVPKTLTIKSGETVNFVNNAGFPHNIVFDEDAIPSG-VN-ADAISRDDYLNAPGETYSVKLTA---AGEYGYYCEPHQGAGMVGKIIV",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment[2],
|
||||
"--VKLGSDKGLLVFEPAKLTIKPGDTVEFLNNKVPPHNVVFDAALNPAKSADLAKSLSHKQLLMSPGQSTSTTFPADAPAGEYTFYCEPHRGAGMVGKITV",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment[3],
|
||||
"VQIKMGTDKYAPLYEPKALSISAGDTVEFVMNKVGPHNVIFDK--VPAG-ES-APALSNTKLRIAPGSFYSVTLGT---PGTYSFYCTPHRGAGMVGTITV",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment[4],
|
||||
"VHMLNKGKDGAMVFEPASLKVAPGDTVTFIPTDK-GHNVETIKGMIPDG-AE-A-------FKSKINENYKVTFTA---PGVYGVKCTPHYGMGMVGVVEV",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment.column_annotations["state"],
|
||||
"DIDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDIDDIDDDDDDDDDDDDDDDDDDDDDDDIIIDDDDDDDDDDDDDDDDDDDDDD",
|
||||
)
|
||||
self.check_reading_writing(path)
|
||||
|
||||
def test_empty(self):
|
||||
"""Checking empty file."""
|
||||
stream = StringIO()
|
||||
alignments = AlignmentIterator(stream)
|
||||
with self.assertRaises(ValueError):
|
||||
next(alignments)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
runner = unittest.TextTestRunner(verbosity=2)
|
||||
unittest.main(testRunner=runner)
|
Reference in New Issue
Block a user