mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 21:53:47 +08:00
Bio.Align parser/writer for A2M alignment files (#3966)
* test files * test script * add module * add state column annotation * update
This commit is contained in:
130
Bio/Align/a2m.py
Normal file
130
Bio/Align/a2m.py
Normal file
@ -0,0 +1,130 @@
|
|||||||
|
# Copyright 2022 by Michiel de Hoon. All rights reserved.
|
||||||
|
#
|
||||||
|
# This file is part of the Biopython distribution and governed by your
|
||||||
|
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
|
||||||
|
# Please see the LICENSE file that should have been included as part of this
|
||||||
|
# package.
|
||||||
|
"""Bio.Align support for A2M files.
|
||||||
|
|
||||||
|
A2M files are alignment files created by align2model or hmmscore in the SAM
|
||||||
|
Sequence Alignment and Modeling Software System.
|
||||||
|
"""
|
||||||
|
from Bio.Align import Alignment
|
||||||
|
from Bio.Align import interfaces
|
||||||
|
from Bio.Seq import Seq
|
||||||
|
from Bio.SeqRecord import SeqRecord
|
||||||
|
from Bio import BiopythonExperimentalWarning
|
||||||
|
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
warnings.warn(
|
||||||
|
"Bio.Align.a2m is an experimental module which may undergo "
|
||||||
|
"significant changes prior to its future official release.",
|
||||||
|
BiopythonExperimentalWarning,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class AlignmentWriter(interfaces.AlignmentWriter):
|
||||||
|
"""Alignment file writer for the A2M file format."""
|
||||||
|
|
||||||
|
def __init__(self, target):
|
||||||
|
"""Create an AlignmentWriter object.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
- target - output stream or file name
|
||||||
|
|
||||||
|
"""
|
||||||
|
super().__init__(target, mode="w")
|
||||||
|
|
||||||
|
def format_alignment(self, alignment):
|
||||||
|
"""Return a string with the alignment in the A2M file format."""
|
||||||
|
if not isinstance(alignment, Alignment):
|
||||||
|
raise TypeError("Expected an Alignment object")
|
||||||
|
lines = []
|
||||||
|
state = alignment.column_annotations["state"]
|
||||||
|
for sequence, line in zip(alignment.sequences, alignment):
|
||||||
|
lines.append(f">{sequence.id} {sequence.description}")
|
||||||
|
s = ""
|
||||||
|
for c, m in zip(line, state):
|
||||||
|
if m == "D":
|
||||||
|
s += c.upper()
|
||||||
|
elif m == "I":
|
||||||
|
if c == "-":
|
||||||
|
s += "."
|
||||||
|
else:
|
||||||
|
s += c.lower()
|
||||||
|
lines.append(s)
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
class AlignmentIterator(interfaces.AlignmentIterator):
|
||||||
|
"""Alignment iterator for files in the A2M file format.
|
||||||
|
|
||||||
|
An A2M file contains one multiple alignment. Matches are represented by
|
||||||
|
upper case letters and deletions by dashes in alignment columns containing
|
||||||
|
matches or deletions only. Insertions are represented by lower case letters,
|
||||||
|
with gaps aligned to the insertion shown as periods. Header lines start
|
||||||
|
with '>' followed by the name of the sequence, and optionally a description.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, source):
|
||||||
|
"""Create an AlignmentIterator object.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
- source - input data or file name
|
||||||
|
|
||||||
|
"""
|
||||||
|
super().__init__(source, mode="t", fmt="A2M")
|
||||||
|
|
||||||
|
def parse(self, stream):
|
||||||
|
"""Parse the next alignment from the stream."""
|
||||||
|
names = []
|
||||||
|
descriptions = []
|
||||||
|
lines = []
|
||||||
|
for line in stream:
|
||||||
|
if line.startswith(">"):
|
||||||
|
parts = line[1:].rstrip().split(None, 1)
|
||||||
|
try:
|
||||||
|
name, description = parts
|
||||||
|
except ValueError:
|
||||||
|
name = parts[0]
|
||||||
|
description = None
|
||||||
|
names.append(name)
|
||||||
|
descriptions.append(description)
|
||||||
|
lines.append("")
|
||||||
|
else:
|
||||||
|
lines[-1] += line.strip()
|
||||||
|
if not lines:
|
||||||
|
raise ValueError("Empty file.")
|
||||||
|
state = ""
|
||||||
|
for c in lines[0]:
|
||||||
|
if c == "-" or c.isupper():
|
||||||
|
state += "D" # Match/deletion state
|
||||||
|
elif c == "." or c.islower():
|
||||||
|
state += "I" # Insertion state
|
||||||
|
else:
|
||||||
|
raise Exception("Unexpected letter '%s' in alignment" % c)
|
||||||
|
for line in lines[1:]:
|
||||||
|
for c, m in zip(line, state):
|
||||||
|
if m == "D": # Match/deletion state
|
||||||
|
assert c == "-" or c.isupper()
|
||||||
|
elif m == "I": # Insertion state
|
||||||
|
assert c == "." or c.islower()
|
||||||
|
else:
|
||||||
|
raise Exception("Unexpected letter '%s' in alignment" % c)
|
||||||
|
for i, line in enumerate(lines):
|
||||||
|
lines[i] = line.upper().replace(".", "-")
|
||||||
|
coordinates = Alignment.infer_coordinates(lines)
|
||||||
|
records = []
|
||||||
|
for name, description, line in zip(names, descriptions, lines):
|
||||||
|
line = line.replace("-", "")
|
||||||
|
sequence = Seq(line)
|
||||||
|
if description is None:
|
||||||
|
record = SeqRecord(sequence, name)
|
||||||
|
else:
|
||||||
|
record = SeqRecord(sequence, name, description=description)
|
||||||
|
records.append(record)
|
||||||
|
alignment = Alignment(records, coordinates)
|
||||||
|
alignment.column_annotations = {}
|
||||||
|
alignment.column_annotations["state"] = state
|
||||||
|
yield alignment
|
16
Tests/Clustalw/clustalw.a2m
Normal file
16
Tests/Clustalw/clustalw.a2m
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
>gi|4959044|gb|AAD34209.1|AF069
|
||||||
|
MENSDSNDKGSDQSAAQRRSQMDRLDREEAFYQFVNNLSEEDYRLMRDNNLLGTPGESTEEELLRRLQQIKEGPPPQSPDENRAGESSDDVTNSDSIIDW
|
||||||
|
LNSVRQTGNTTRSRQRGNQSWRAVSRTNPNSGDFRFSLEINVNRNNGSQTSENESEPSTRRLSVENMESSSQRQMENSASESASARPSRAERNSTEAVTE
|
||||||
|
VPTTRAQRRARSRSPEHRRTRARAERSMSPLQPTSEIPRRAPTLEQSSENEPEGSSRTRHHVTLRQQISGPELLGRGLFAASGSRNPSQGTSSSDTGSNS
|
||||||
|
ESSGSGQRPPTIVLDLQVRRVRPGEYRQRDSIASRTRSRSQAPNNTVTYESERGGFRRTFSRSERAGVRTYVSTIRIPIRRILNTGLSETTSVAIQTMLR
|
||||||
|
QIMTGFGELSYFMYSDSDSEPSASVSSRNVERVESRNGRGSSGGGNSSGSSSSSSPSPSSSGESSESSSKMFEGSSEGGSSGPSRKDGRHRAPVTFDESG
|
||||||
|
SLPFFSLAQFFLLNEDDEDQPRGLTKEQIDNLAMRSFGENDALKTCSVCITEYTEGDKLRKLPCSHEFHVHCIDRWLSE.NSTCPICRRAVLSSGNRESV
|
||||||
|
V
|
||||||
|
>gi|671626|emb|CAA85685.1|
|
||||||
|
---------MSPQTETKASVGFKAGVKEYKLTYYTPEYETKDTDILAAFRVTPQPG-----------------VPPEEAGAAVAAESSTGT---------
|
||||||
|
WTTVWTDGLTSLDRYKG-----RCYHIEPVPG-------------------EKDQCICYVAYPLDLFEEGSVTNMFTSIVGNVFGFKALRALRLEDLRIP
|
||||||
|
VAYVKTFQGPPHGIQVERDKLNKYGRPLLGCTIKPKLGLSAKNYGRAVYECLRGGLDFTKDDENVNSQPFMRWRDRFLFCAEAIYKAQAETGEIKGHYLN
|
||||||
|
ATAG-----------------------TCEEMIKRAIFARELGVPIVMHDYLTGGFTANTSLAHYCRDNGLLLHIHRAMHAVIDRQKNHGMHFRVLAKAL
|
||||||
|
RLSGGDHIHSGTVVGKLEGERDITLGFVDLLRDDFIEKDRSRGIYFTQDWVSLPGVIPVASG-----------------------------GIHVWHMPA
|
||||||
|
LTEIFGDDSVLQFGGGTLGHPWGNAPGAVANRVA-----------VEACVKARNEG---RDLAAEGNAIIREACKWSPElAAACEVWKEIKFEFPAMD--
|
||||||
|
-
|
4
Tests/Clustalw/kalign.a2m
Normal file
4
Tests/Clustalw/kalign.a2m
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
>Test1seq
|
||||||
|
GCTGGGGATGGAGAGGGAACAGAGT.T
|
||||||
|
>AT3G20900
|
||||||
|
GCTGGGGATGGAGAGGGAACAGAGTaG
|
32
Tests/Clustalw/msaprobs.a2m
Normal file
32
Tests/Clustalw/msaprobs.a2m
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
>V_Harveyi_PATH
|
||||||
|
MKNW........IKV....AVAAI.A..LSAA...................TVQAATEVKVGMSGRYFPFTFVK..QDKLQGFEVDMWDEIGKRNDYKIE
|
||||||
|
YVTANFSGLFGLLETGRIDTISNQITMTDARKAKYLFADPYVVDGAQITVRK.GNDSIQGVEDLAGKTVAVNLGSNFEQLLRDYDKDGKINIKTYDT..G
|
||||||
|
IEHDVALGRADAFIMDRLSALE.LIKKTG.LPLQLAGEPFE.....TIQNAWPFVDNEKGRKLQAEVNKALAEMRADGTVEKISVKWFGADITK....
|
||||||
|
>B_subtilis_YXEM
|
||||||
|
MKMKkw......TVL....VVAALlA.vLSACgn............g.nssSKEDDNVLHVGATGQSYPFAYKE..NGKLTGFDVEVMEAVAKKIDMKLD
|
||||||
|
WKLLEFSGLMGELQTGKLDTISNQVAVTDERKETYNFTKPYAYAGTQIVVKK.DNTDIKSVDDLKGKTVAAVLGSNHAKNLESKDPDKKINIKTYETqeG
|
||||||
|
TLKDVAYGRVDAYVNSRTVLIA.QIKKTG.LPLKLAGDPIV.....YEQVAFPFAKDDAHDKLRKKVNKALDELRKDGTLKKLSEKYFNEDITVeqkh
|
||||||
|
>FLIY_ECOLI
|
||||||
|
MKLAhlgrqalmGVM....AVALVaG..MSVKsf.........adeg.llnKVKERGTLLVGLEGTYPPFSFQGd.DGKLTGFEVEFAQQLAKHLGVEAS
|
||||||
|
LKPTKWDGMLASLDSKRIDVVINQVTISDERKKKYDFSTPYTISGIQALVKKgNEGTIKTADDLKGKKVGVGLGTNYEEWLRQN--VQGVDVRTYDDdpT
|
||||||
|
KYQDLRVGRIDAILVDRLAALD.LVKKTN.DTLAVTGEAFS.....RQESGVALRK--GNEDLLKAVNDAIAEMQKDGTLQALSEKWFGADVTK....
|
||||||
|
>Deinococcus_radiodurans
|
||||||
|
MKKSll......SLKlsglLVPSVlAlsLSACss...............psSTLNQGTLKIAMEGTYPPFTSKNe.QGELVGFDVDIAKAVAQKLNLKPE
|
||||||
|
FVLTEWSGILAGLQANKYDVIVNQVGITPERQNSIGFSQPYAYSRPEIIVAKnNTFNPQSLADLKGKRVGSTLGSNYEKQLI-D--TGDIKIVTYPGapE
|
||||||
|
ILADLVAGRIDAAYNDRLVVNY.IIND-QkLPVRGAGQIGD.....AAPVGIALKK--GNSALKDQIDKALTEMRSDGTFEKISQKWFGQDVGQ...p
|
||||||
|
>B_subtilis_GlnH_homo_YCKK
|
||||||
|
MKKAll......ALF....MVVSIaA..LAACgagndnqskdnakdgdlwaSIKKKGVLTVGTEGTYEPFTYHDkdTDKLTGYDVEVITEVAKRLGLKVD
|
||||||
|
FKETQWGSMFAGLNSKRFDVVANQVGKTD-REDKYDFSDKYTTSRAVVVTKK.DNNDIKSEADVKGKTSAQSLTSNYNKLAT-N--A-GAKVEGVEGmaQ
|
||||||
|
ALQMIQQARVDMTYNDKLAVLN.YLKTSGnKNVKIAFETGE.....PQSTYFTFRK--GSGEVVDQVNKALKEMKEDGTLSKISKKWFGEDVSK....
|
||||||
|
>YA80_HAEIN
|
||||||
|
MKKLlf......TTA....LLTGAiA..FSTFs...........hageiadRVEKTKTLLVGTEGTYAPFTFHDk.SGKLTGFDVEVIRKVAEKLGLKVE
|
||||||
|
FKETQWDAMYAGLNAKRFDVIANQTNPSPERLKKYSFTTPYNYSGGVIVTKS.SDNSIKSFEDLKGRKSAQSATSNWGKDAK-A--A-GAQILVVDGlaQ
|
||||||
|
SLELIKQGRAEATINDKLAVLD.YFKQHPnSGLKIAYDRGD.....KTPTAFAFLQ--GEDALITKFNQVLEALRQDGTLKQISIEWFGYDITQ....
|
||||||
|
>E_coli_GlnH
|
||||||
|
MKSVl.......KVS....LAALTlA..FAVSsh.........a.......---ADKKLVVATDTAFVPFEFKQ..GDKYVGFDVDLWAAIAKELKLDYE
|
||||||
|
LKPMDFSGIIPALQTKNVDLALAGITITDERKKAIDFSDGYYKSGLLVMVKAn-NNDVKSVKDLDGKVVAVKSGTGSVDYAKAN--IKTKDLRQFPNidN
|
||||||
|
AYMELGTNRADAVLHDTPNILY.FIKTAGnGQFKAVGDSLE.....AQQYGIAFPK--GSDELRDKVNGALKTLRENGTYNEIYKKWFGTEP-K....
|
||||||
|
>HISJ_E_COLI
|
||||||
|
MKKLvl......SLS....LV---lA..FSSAta...............a.FAAIPQNIRIGTDPTYAPFESKNs.QGELVGFDIDLAKELCKRINTQCT
|
||||||
|
FVENPLDALIPSLKAKKIDAIMSSLSITEKRQQEIAFTDKLYAADSRLVVAK.NSDIQPTVESLKGKRVGVLQGTTQETFGNEHWAPKGIEIVSYQGqdN
|
||||||
|
IYSDLTAGRIDAAFQDEVAASEgFLKQPVgKDYKFGGPSVKdeklfGVGTGMGLRK--EDNELREALNKAFAEMRADGTYEKLAKKYFDFDVYG...g
|
24
Tests/Clustalw/muscle.a2m
Normal file
24
Tests/Clustalw/muscle.a2m
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
>Test1seq
|
||||||
|
.................................................................AGTTACAATAACTGACGAAGCTAAGTAGGCTACTA
|
||||||
|
ATTAACGTCATCAACCTAATACATAGCACTTAGAAAAAAGTGAAGTAAGAAAATATAAAATAATAAAAGGGTGGGTTATCAATTGATAGTGTAAATCATC
|
||||||
|
GTATTCCGGTGATATACCCTACCACAAAAACTCAAACCGACTTGATTCAAATCATCTCAATAAATTAGCGCCAAAATAATGAAAAAAATAATAACAAACA
|
||||||
|
AAAACAAACCAAAATAAGAAAAAACATTACGCAAAACATAATAATTTACTCTTCGTTATTGTATTAACAAATCAAAGAGCTGAATTTTGATCACCTGCTA
|
||||||
|
ATACTACTTTCTGTATTGATCCTATATCAACGTAAACAAAGATACTAATAATTAACTAAAAGTACGTTCATCGATCGTGTTCGTTGACGAAGAAGAGCTC
|
||||||
|
TATCTCCGGCGGAGCAAAGAAAACGATCTGTCTCCGTCGTAACACACGGTCGCTAGAGAAACTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCC
|
||||||
|
GGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCGTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTT.
|
||||||
|
>AT3G20900.1-SEQ
|
||||||
|
atgaacaaagtagcgaggaagaacaaaacatcaggtgaacaaaaaaaaaactcaatccacatcaaAGTTACAATAACTGACGAAGCTAAGTAGGCTAGAA
|
||||||
|
ATTAAAGTCATCAACCTAATACATAGCACTTAGAAAAAAGTGAAGCAAGAAAATATAAAATAATAAAAGGGTGGGTTATCAATTGATAGTGTAAATCATA
|
||||||
|
GTTGATTTTTGATATACCCTACCACAAAAACTCAAACCGACTTGATTCAAATCATCTCAAAAAACAAGCGCCAAAATAATGAAAAAAATAATAACAAAAA
|
||||||
|
CAAACAAACCAAAATAAGAAAAAACATTACGCAAAACATAATAATTTACTCTTCGTTATTGTATTAACAAATCAAAGAGATGAATTTTGATCACCTGCTA
|
||||||
|
ATACTACTTTCTGTATTGATCCTATATCAAAAAAAAAAAAGATACTAATAATTAACTAAAAGTACGTTCATCGATCGTGTGCGTTGACGAAGAAGAGCTC
|
||||||
|
TATCTCCGGCGGAGCAAAGAAAACGATCTGTCTCCGTCGTAACACACAGTTTTTCGAGACCCTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCC
|
||||||
|
GGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCCTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTAg
|
||||||
|
>AT3G20900.1-CDS
|
||||||
|
.................................................................-----------------------------------
|
||||||
|
----------------------------------------------------------------------------------------------------
|
||||||
|
------------------------------------------------------------------------------ATGAACAAAGTAGCGAGGAAGA
|
||||||
|
A------------------------------CAAAACATC------------------------------------------------------------
|
||||||
|
----------------------------------------------------------------------------------------------------
|
||||||
|
------------AGCAAAGAAAACGATCTGTCTCCGTCGTAACACACAGTTTTTCGAGACCCTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCC
|
||||||
|
GGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCCTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTAg
|
15
Tests/Clustalw/probcons.a2m
Normal file
15
Tests/Clustalw/probcons.a2m
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
>plas_horvu
|
||||||
|
D.VLLGANGGVLVFEPNDFSVKAGETITFKNNAGYPHNVVFDEDAVPSG.VD.VSKISQEEYLTAPGETFSVTLTV...PGTYGFYCEPHAGAGMVGKVT
|
||||||
|
V
|
||||||
|
>plas_chlre
|
||||||
|
-.VKLGADSGALEFVPKTLTIKSGETVNFVNNAGFPHNIVFDEDAIPSG.VN.ADAISRDDYLNAPGETYSVKLTA...AGEYGYYCEPHQGAGMVGKII
|
||||||
|
V
|
||||||
|
>plas_anava
|
||||||
|
-.VKLGSDKGLLVFEPAKLTIKPGDTVEFLNNKVPPHNVVFDAALNPAKsADlAKSLSHKQLLMSPGQSTSTTFPAdapAGEYTFYCEPHRGAGMVGKIT
|
||||||
|
V
|
||||||
|
>plas_proho
|
||||||
|
VqIKMGTDKYAPLYEPKALSISAGDTVEFVMNKVGPHNVIFDK--VPAG.ES.APALSNTKLRIAPGSFYSVTLGT...PGTYSFYCTPHRGAGMVGTIT
|
||||||
|
V
|
||||||
|
>azup_achcy
|
||||||
|
VhMLNKGKDGAMVFEPASLKVAPGDTVTFIPTDK-GHNVETIKGMIPDG.AE.A-------FKSKINENYKVTFTA...PGVYGVKCTPHYGMGMVGVVE
|
||||||
|
V
|
321
Tests/test_Align_a2m.py
Normal file
321
Tests/test_Align_a2m.py
Normal file
@ -0,0 +1,321 @@
|
|||||||
|
# Copyright 2006-2014 by Peter Cock. All rights reserved.
|
||||||
|
# Copyright 2022 by Michiel de Hoon. All rights reserved.
|
||||||
|
# This code is part of the Biopython distribution and governed by its
|
||||||
|
# license. Please see the LICENSE file that should have been included
|
||||||
|
# as part of this package.
|
||||||
|
"""Tests for Bio.Align.a2m module."""
|
||||||
|
import unittest
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
from io import StringIO
|
||||||
|
|
||||||
|
from Bio import BiopythonExperimentalWarning
|
||||||
|
|
||||||
|
with warnings.catch_warnings():
|
||||||
|
warnings.simplefilter("ignore", BiopythonExperimentalWarning)
|
||||||
|
from Bio.Align.a2m import AlignmentIterator
|
||||||
|
from Bio.Align.a2m import AlignmentWriter
|
||||||
|
|
||||||
|
|
||||||
|
class TestA2MReadingWriting(unittest.TestCase):
|
||||||
|
def check_reading_writing(self, path):
|
||||||
|
alignments = AlignmentIterator(path)
|
||||||
|
stream = StringIO()
|
||||||
|
writer = AlignmentWriter(stream)
|
||||||
|
n = writer.write_file(alignments, mincount=1, maxcount=1)
|
||||||
|
self.assertEqual(n, 1)
|
||||||
|
alignments = AlignmentIterator(path)
|
||||||
|
alignment = next(alignments)
|
||||||
|
stream.seek(0)
|
||||||
|
saved_alignments = AlignmentIterator(stream)
|
||||||
|
saved_alignment = next(saved_alignments)
|
||||||
|
with self.assertRaises(StopIteration):
|
||||||
|
next(saved_alignments)
|
||||||
|
self.assertEqual(len(alignment), len(saved_alignment))
|
||||||
|
for i, (sequence, saved_sequence) in enumerate(
|
||||||
|
zip(alignment.sequences, saved_alignment.sequences)
|
||||||
|
):
|
||||||
|
self.assertEqual(sequence.id, saved_sequence.id)
|
||||||
|
self.assertEqual(sequence.seq, saved_sequence.seq)
|
||||||
|
self.assertEqual(alignment[i], saved_alignment[i])
|
||||||
|
|
||||||
|
def test_clustalw(self):
|
||||||
|
path = "Clustalw/clustalw.a2m"
|
||||||
|
with open(path) as stream:
|
||||||
|
alignments = AlignmentIterator(stream)
|
||||||
|
alignment = next(alignments)
|
||||||
|
with self.assertRaises(StopIteration):
|
||||||
|
next(alignments)
|
||||||
|
self.assertEqual(
|
||||||
|
repr(alignment),
|
||||||
|
"<Bio.Align.Alignment object (2 rows x 601 columns) at 0x%x>"
|
||||||
|
% id(alignment),
|
||||||
|
)
|
||||||
|
self.assertEqual(len(alignment), 2)
|
||||||
|
self.assertEqual(alignment.sequences[0].id, "gi|4959044|gb|AAD34209.1|AF069")
|
||||||
|
self.assertEqual(alignment.sequences[1].id, "gi|671626|emb|CAA85685.1|")
|
||||||
|
self.assertEqual(
|
||||||
|
alignment.sequences[0].seq,
|
||||||
|
"MENSDSNDKGSDQSAAQRRSQMDRLDREEAFYQFVNNLSEEDYRLMRDNNLLGTPGESTEEELLRRLQQIKEGPPPQSPDENRAGESSDDVTNSDSIIDWLNSVRQTGNTTRSRQRGNQSWRAVSRTNPNSGDFRFSLEINVNRNNGSQTSENESEPSTRRLSVENMESSSQRQMENSASESASARPSRAERNSTEAVTEVPTTRAQRRARSRSPEHRRTRARAERSMSPLQPTSEIPRRAPTLEQSSENEPEGSSRTRHHVTLRQQISGPELLGRGLFAASGSRNPSQGTSSSDTGSNSESSGSGQRPPTIVLDLQVRRVRPGEYRQRDSIASRTRSRSQAPNNTVTYESERGGFRRTFSRSERAGVRTYVSTIRIPIRRILNTGLSETTSVAIQTMLRQIMTGFGELSYFMYSDSDSEPSASVSSRNVERVESRNGRGSSGGGNSSGSSSSSSPSPSSSGESSESSSKMFEGSSEGGSSGPSRKDGRHRAPVTFDESGSLPFFSLAQFFLLNEDDEDQPRGLTKEQIDNLAMRSFGENDALKTCSVCITEYTEGDKLRKLPCSHEFHVHCIDRWLSENSTCPICRRAVLSSGNRESVV",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment.sequences[1].seq,
|
||||||
|
"MSPQTETKASVGFKAGVKEYKLTYYTPEYETKDTDILAAFRVTPQPGVPPEEAGAAVAAESSTGTWTTVWTDGLTSLDRYKGRCYHIEPVPGEKDQCICYVAYPLDLFEEGSVTNMFTSIVGNVFGFKALRALRLEDLRIPVAYVKTFQGPPHGIQVERDKLNKYGRPLLGCTIKPKLGLSAKNYGRAVYECLRGGLDFTKDDENVNSQPFMRWRDRFLFCAEAIYKAQAETGEIKGHYLNATAGTCEEMIKRAIFARELGVPIVMHDYLTGGFTANTSLAHYCRDNGLLLHIHRAMHAVIDRQKNHGMHFRVLAKALRLSGGDHIHSGTVVGKLEGERDITLGFVDLLRDDFIEKDRSRGIYFTQDWVSLPGVIPVASGGIHVWHMPALTEIFGDDSVLQFGGGTLGHPWGNAPGAVANRVAVEACVKARNEGRDLAAEGNAIIREACKWSPELAAACEVWKEIKFEFPAMD",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment[0],
|
||||||
|
"MENSDSNDKGSDQSAAQRRSQMDRLDREEAFYQFVNNLSEEDYRLMRDNNLLGTPGESTEEELLRRLQQIKEGPPPQSPDENRAGESSDDVTNSDSIIDWLNSVRQTGNTTRSRQRGNQSWRAVSRTNPNSGDFRFSLEINVNRNNGSQTSENESEPSTRRLSVENMESSSQRQMENSASESASARPSRAERNSTEAVTEVPTTRAQRRARSRSPEHRRTRARAERSMSPLQPTSEIPRRAPTLEQSSENEPEGSSRTRHHVTLRQQISGPELLGRGLFAASGSRNPSQGTSSSDTGSNSESSGSGQRPPTIVLDLQVRRVRPGEYRQRDSIASRTRSRSQAPNNTVTYESERGGFRRTFSRSERAGVRTYVSTIRIPIRRILNTGLSETTSVAIQTMLRQIMTGFGELSYFMYSDSDSEPSASVSSRNVERVESRNGRGSSGGGNSSGSSSSSSPSPSSSGESSESSSKMFEGSSEGGSSGPSRKDGRHRAPVTFDESGSLPFFSLAQFFLLNEDDEDQPRGLTKEQIDNLAMRSFGENDALKTCSVCITEYTEGDKLRKLPCSHEFHVHCIDRWLSE-NSTCPICRRAVLSSGNRESVV",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment[1],
|
||||||
|
"---------MSPQTETKASVGFKAGVKEYKLTYYTPEYETKDTDILAAFRVTPQPG-----------------VPPEEAGAAVAAESSTGT---------WTTVWTDGLTSLDRYKG-----RCYHIEPVPG-------------------EKDQCICYVAYPLDLFEEGSVTNMFTSIVGNVFGFKALRALRLEDLRIPVAYVKTFQGPPHGIQVERDKLNKYGRPLLGCTIKPKLGLSAKNYGRAVYECLRGGLDFTKDDENVNSQPFMRWRDRFLFCAEAIYKAQAETGEIKGHYLNATAG-----------------------TCEEMIKRAIFARELGVPIVMHDYLTGGFTANTSLAHYCRDNGLLLHIHRAMHAVIDRQKNHGMHFRVLAKALRLSGGDHIHSGTVVGKLEGERDITLGFVDLLRDDFIEKDRSRGIYFTQDWVSLPGVIPVASG-----------------------------GIHVWHMPALTEIFGDDSVLQFGGGTLGHPWGNAPGAVANRVA-----------VEACVKARNEG---RDLAAEGNAIIREACKWSPELAAACEVWKEIKFEFPAMD---",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment.column_annotations["state"],
|
||||||
|

|
||||||
|
)
|
||||||
|
self.check_reading_writing(path)
|
||||||
|
|
||||||
|
def test_msaprobs(self):
|
||||||
|
path = "Clustalw/msaprobs.a2m"
|
||||||
|
# This example was obtained from
|
||||||
|
# http://virgil.ruc.dk/kurser/Sekvens/Treedraw.htm
|
||||||
|
# and converted to A2M format.
|
||||||
|
with open(path) as stream:
|
||||||
|
alignments = AlignmentIterator(stream)
|
||||||
|
alignment = next(alignments)
|
||||||
|
with self.assertRaises(StopIteration):
|
||||||
|
next(alignments)
|
||||||
|
self.assertEqual(
|
||||||
|
repr(alignment),
|
||||||
|
"<Bio.Align.Alignment object (8 rows x 298 columns) at 0x%x>"
|
||||||
|
% id(alignment),
|
||||||
|
)
|
||||||
|
self.assertEqual(len(alignment), 8)
|
||||||
|
self.assertEqual(alignment.shape, (8, 298))
|
||||||
|
self.assertEqual(alignment.sequences[0].id, "V_Harveyi_PATH")
|
||||||
|
self.assertEqual(alignment.sequences[1].id, "B_subtilis_YXEM")
|
||||||
|
self.assertEqual(alignment.sequences[2].id, "FLIY_ECOLI")
|
||||||
|
self.assertEqual(alignment.sequences[3].id, "Deinococcus_radiodurans")
|
||||||
|
self.assertEqual(alignment.sequences[4].id, "B_subtilis_GlnH_homo_YCKK")
|
||||||
|
self.assertEqual(alignment.sequences[5].id, "YA80_HAEIN")
|
||||||
|
self.assertEqual(alignment.sequences[6].id, "E_coli_GlnH")
|
||||||
|
self.assertEqual(alignment.sequences[7].id, "HISJ_E_COLI")
|
||||||
|
self.assertEqual(
|
||||||
|
alignment.sequences[0].seq,
|
||||||
|
"MKNWIKVAVAAIALSAATVQAATEVKVGMSGRYFPFTFVKQDKLQGFEVDMWDEIGKRNDYKIEYVTANFSGLFGLLETGRIDTISNQITMTDARKAKYLFADPYVVDGAQITVRKGNDSIQGVEDLAGKTVAVNLGSNFEQLLRDYDKDGKINIKTYDTGIEHDVALGRADAFIMDRLSALELIKKTGLPLQLAGEPFETIQNAWPFVDNEKGRKLQAEVNKALAEMRADGTVEKISVKWFGADITK",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment.sequences[1].seq,
|
||||||
|
"MKMKKWTVLVVAALLAVLSACGNGNSSSKEDDNVLHVGATGQSYPFAYKENGKLTGFDVEVMEAVAKKIDMKLDWKLLEFSGLMGELQTGKLDTISNQVAVTDERKETYNFTKPYAYAGTQIVVKKDNTDIKSVDDLKGKTVAAVLGSNHAKNLESKDPDKKINIKTYETQEGTLKDVAYGRVDAYVNSRTVLIAQIKKTGLPLKLAGDPIVYEQVAFPFAKDDAHDKLRKKVNKALDELRKDGTLKKLSEKYFNEDITVEQKH",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment.sequences[2].seq,
|
||||||
|
"MKLAHLGRQALMGVMAVALVAGMSVKSFADEGLLNKVKERGTLLVGLEGTYPPFSFQGDDGKLTGFEVEFAQQLAKHLGVEASLKPTKWDGMLASLDSKRIDVVINQVTISDERKKKYDFSTPYTISGIQALVKKGNEGTIKTADDLKGKKVGVGLGTNYEEWLRQNVQGVDVRTYDDDPTKYQDLRVGRIDAILVDRLAALDLVKKTNDTLAVTGEAFSRQESGVALRKGNEDLLKAVNDAIAEMQKDGTLQALSEKWFGADVTK",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment.sequences[3].seq,
|
||||||
|
"MKKSLLSLKLSGLLVPSVLALSLSACSSPSSTLNQGTLKIAMEGTYPPFTSKNEQGELVGFDVDIAKAVAQKLNLKPEFVLTEWSGILAGLQANKYDVIVNQVGITPERQNSIGFSQPYAYSRPEIIVAKNNTFNPQSLADLKGKRVGSTLGSNYEKQLIDTGDIKIVTYPGAPEILADLVAGRIDAAYNDRLVVNYIINDQKLPVRGAGQIGDAAPVGIALKKGNSALKDQIDKALTEMRSDGTFEKISQKWFGQDVGQP",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment.sequences[4].seq,
|
||||||
|
"MKKALLALFMVVSIAALAACGAGNDNQSKDNAKDGDLWASIKKKGVLTVGTEGTYEPFTYHDKDTDKLTGYDVEVITEVAKRLGLKVDFKETQWGSMFAGLNSKRFDVVANQVGKTDREDKYDFSDKYTTSRAVVVTKKDNNDIKSEADVKGKTSAQSLTSNYNKLATNAGAKVEGVEGMAQALQMIQQARVDMTYNDKLAVLNYLKTSGNKNVKIAFETGEPQSTYFTFRKGSGEVVDQVNKALKEMKEDGTLSKISKKWFGEDVSK",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment.sequences[5].seq,
|
||||||
|
"MKKLLFTTALLTGAIAFSTFSHAGEIADRVEKTKTLLVGTEGTYAPFTFHDKSGKLTGFDVEVIRKVAEKLGLKVEFKETQWDAMYAGLNAKRFDVIANQTNPSPERLKKYSFTTPYNYSGGVIVTKSSDNSIKSFEDLKGRKSAQSATSNWGKDAKAAGAQILVVDGLAQSLELIKQGRAEATINDKLAVLDYFKQHPNSGLKIAYDRGDKTPTAFAFLQGEDALITKFNQVLEALRQDGTLKQISIEWFGYDITQ",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment.sequences[6].seq,
|
||||||
|
"MKSVLKVSLAALTLAFAVSSHAADKKLVVATDTAFVPFEFKQGDKYVGFDVDLWAAIAKELKLDYELKPMDFSGIIPALQTKNVDLALAGITITDERKKAIDFSDGYYKSGLLVMVKANNNDVKSVKDLDGKVVAVKSGTGSVDYAKANIKTKDLRQFPNIDNAYMELGTNRADAVLHDTPNILYFIKTAGNGQFKAVGDSLEAQQYGIAFPKGSDELRDKVNGALKTLRENGTYNEIYKKWFGTEPK",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment.sequences[7].seq,
|
||||||
|
"MKKLVLSLSLVLAFSSATAAFAAIPQNIRIGTDPTYAPFESKNSQGELVGFDIDLAKELCKRINTQCTFVENPLDALIPSLKAKKIDAIMSSLSITEKRQQEIAFTDKLYAADSRLVVAKNSDIQPTVESLKGKRVGVLQGTTQETFGNEHWAPKGIEIVSYQGQDNIYSDLTAGRIDAAFQDEVAASEGFLKQPVGKDYKFGGPSVKDEKLFGVGTGMGLRKEDNELREALNKAFAEMRADGTYEKLAKKYFDFDVYGG",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment[0],
|
||||||
|
"MKNW--------IKV----AVAAI-A--LSAA-------------------TVQAATEVKVGMSGRYFPFTFVK--QDKLQGFEVDMWDEIGKRNDYKIEYVTANFSGLFGLLETGRIDTISNQITMTDARKAKYLFADPYVVDGAQITVRK-GNDSIQGVEDLAGKTVAVNLGSNFEQLLRDYDKDGKINIKTYDT--GIEHDVALGRADAFIMDRLSALE-LIKKTG-LPLQLAGEPFE-----TIQNAWPFVDNEKGRKLQAEVNKALAEMRADGTVEKISVKWFGADITK----",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment[1],
|
||||||
|
"MKMKKW------TVL----VVAALLA-VLSACGN------------G-NSSSKEDDNVLHVGATGQSYPFAYKE--NGKLTGFDVEVMEAVAKKIDMKLDWKLLEFSGLMGELQTGKLDTISNQVAVTDERKETYNFTKPYAYAGTQIVVKK-DNTDIKSVDDLKGKTVAAVLGSNHAKNLESKDPDKKINIKTYETQEGTLKDVAYGRVDAYVNSRTVLIA-QIKKTG-LPLKLAGDPIV-----YEQVAFPFAKDDAHDKLRKKVNKALDELRKDGTLKKLSEKYFNEDITVEQKH",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment[2],
|
||||||
|
"MKLAHLGRQALMGVM----AVALVAG--MSVKSF---------ADEG-LLNKVKERGTLLVGLEGTYPPFSFQGD-DGKLTGFEVEFAQQLAKHLGVEASLKPTKWDGMLASLDSKRIDVVINQVTISDERKKKYDFSTPYTISGIQALVKKGNEGTIKTADDLKGKKVGVGLGTNYEEWLRQN--VQGVDVRTYDDDPTKYQDLRVGRIDAILVDRLAALD-LVKKTN-DTLAVTGEAFS-----RQESGVALRK--GNEDLLKAVNDAIAEMQKDGTLQALSEKWFGADVTK----",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment[3],
|
||||||
|
"MKKSLL------SLKLSGLLVPSVLALSLSACSS---------------PSSTLNQGTLKIAMEGTYPPFTSKNE-QGELVGFDVDIAKAVAQKLNLKPEFVLTEWSGILAGLQANKYDVIVNQVGITPERQNSIGFSQPYAYSRPEIIVAKNNTFNPQSLADLKGKRVGSTLGSNYEKQLI-D--TGDIKIVTYPGAPEILADLVAGRIDAAYNDRLVVNY-IIND-QKLPVRGAGQIGD-----AAPVGIALKK--GNSALKDQIDKALTEMRSDGTFEKISQKWFGQDVGQ---P",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment[4],
|
||||||
|
"MKKALL------ALF----MVVSIAA--LAACGAGNDNQSKDNAKDGDLWASIKKKGVLTVGTEGTYEPFTYHDKDTDKLTGYDVEVITEVAKRLGLKVDFKETQWGSMFAGLNSKRFDVVANQVGKTD-REDKYDFSDKYTTSRAVVVTKK-DNNDIKSEADVKGKTSAQSLTSNYNKLAT-N--A-GAKVEGVEGMAQALQMIQQARVDMTYNDKLAVLN-YLKTSGNKNVKIAFETGE-----PQSTYFTFRK--GSGEVVDQVNKALKEMKEDGTLSKISKKWFGEDVSK----",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment[5],
|
||||||
|
"MKKLLF------TTA----LLTGAIA--FSTFS-----------HAGEIADRVEKTKTLLVGTEGTYAPFTFHDK-SGKLTGFDVEVIRKVAEKLGLKVEFKETQWDAMYAGLNAKRFDVIANQTNPSPERLKKYSFTTPYNYSGGVIVTKS-SDNSIKSFEDLKGRKSAQSATSNWGKDAK-A--A-GAQILVVDGLAQSLELIKQGRAEATINDKLAVLD-YFKQHPNSGLKIAYDRGD-----KTPTAFAFLQ--GEDALITKFNQVLEALRQDGTLKQISIEWFGYDITQ----",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment[6],
|
||||||
|
"MKSVL-------KVS----LAALTLA--FAVSSH---------A----------ADKKLVVATDTAFVPFEFKQ--GDKYVGFDVDLWAAIAKELKLDYELKPMDFSGIIPALQTKNVDLALAGITITDERKKAIDFSDGYYKSGLLVMVKAN-NNDVKSVKDLDGKVVAVKSGTGSVDYAKAN--IKTKDLRQFPNIDNAYMELGTNRADAVLHDTPNILY-FIKTAGNGQFKAVGDSLE-----AQQYGIAFPK--GSDELRDKVNGALKTLRENGTYNEIYKKWFGTEP-K----",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment[7],
|
||||||
|
"MKKLVL------SLS----LV---LA--FSSATA---------------A-FAAIPQNIRIGTDPTYAPFESKNS-QGELVGFDIDLAKELCKRINTQCTFVENPLDALIPSLKAKKIDAIMSSLSITEKRQQEIAFTDKLYAADSRLVVAK-NSDIQPTVESLKGKRVGVLQGTTQETFGNEHWAPKGIEIVSYQGQDNIYSDLTAGRIDAAFQDEVAASEGFLKQPVGKDYKFGGPSVKDEKLFGVGTGMGLRK--EDNELREALNKAFAEMRADGTYEKLAKKYFDFDVYG---G",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment.column_annotations["state"],
|
||||||
|
"DDDDIIIIIIIIDDDIIIIDDDDDIDIIDDDDIIIIIIIIIIIIIIIIIIIDDDDDDDDDDDDDDDDDDDDDDDIIDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDIDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDIIDDDDDDDDDDDDDDDDDDDDDDDIDDDDDDIDDDDDDDDDDDIIIIIDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDIIII",
|
||||||
|
)
|
||||||
|
self.check_reading_writing(path)
|
||||||
|
|
||||||
|
def test_muscle(self):
|
||||||
|
path = "Clustalw/muscle.a2m"
|
||||||
|
with open(path) as stream:
|
||||||
|
alignments = AlignmentIterator(stream)
|
||||||
|
alignment = next(alignments)
|
||||||
|
with self.assertRaises(StopIteration):
|
||||||
|
next(alignments)
|
||||||
|
self.assertEqual(
|
||||||
|
repr(alignment),
|
||||||
|
"<Bio.Align.Alignment object (3 rows x 687 columns) at 0x%x>"
|
||||||
|
% id(alignment),
|
||||||
|
)
|
||||||
|
self.assertEqual(len(alignment), 3)
|
||||||
|
self.assertEqual(alignment.sequences[0].id, "Test1seq")
|
||||||
|
self.assertEqual(alignment.sequences[1].id, "AT3G20900.1-SEQ")
|
||||||
|
self.assertEqual(alignment.sequences[2].id, "AT3G20900.1-CDS")
|
||||||
|
self.assertEqual(
|
||||||
|
alignment.sequences[0].seq,
|
||||||
|
"AGTTACAATAACTGACGAAGCTAAGTAGGCTACTAATTAACGTCATCAACCTAATACATAGCACTTAGAAAAAAGTGAAGTAAGAAAATATAAAATAATAAAAGGGTGGGTTATCAATTGATAGTGTAAATCATCGTATTCCGGTGATATACCCTACCACAAAAACTCAAACCGACTTGATTCAAATCATCTCAATAAATTAGCGCCAAAATAATGAAAAAAATAATAACAAACAAAAACAAACCAAAATAAGAAAAAACATTACGCAAAACATAATAATTTACTCTTCGTTATTGTATTAACAAATCAAAGAGCTGAATTTTGATCACCTGCTAATACTACTTTCTGTATTGATCCTATATCAACGTAAACAAAGATACTAATAATTAACTAAAAGTACGTTCATCGATCGTGTTCGTTGACGAAGAAGAGCTCTATCTCCGGCGGAGCAAAGAAAACGATCTGTCTCCGTCGTAACACACGGTCGCTAGAGAAACTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCCGGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCGTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTT",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment.sequences[1].seq,
|
||||||
|
"ATGAACAAAGTAGCGAGGAAGAACAAAACATCAGGTGAACAAAAAAAAAACTCAATCCACATCAAAGTTACAATAACTGACGAAGCTAAGTAGGCTAGAAATTAAAGTCATCAACCTAATACATAGCACTTAGAAAAAAGTGAAGCAAGAAAATATAAAATAATAAAAGGGTGGGTTATCAATTGATAGTGTAAATCATAGTTGATTTTTGATATACCCTACCACAAAAACTCAAACCGACTTGATTCAAATCATCTCAAAAAACAAGCGCCAAAATAATGAAAAAAATAATAACAAAAACAAACAAACCAAAATAAGAAAAAACATTACGCAAAACATAATAATTTACTCTTCGTTATTGTATTAACAAATCAAAGAGATGAATTTTGATCACCTGCTAATACTACTTTCTGTATTGATCCTATATCAAAAAAAAAAAAGATACTAATAATTAACTAAAAGTACGTTCATCGATCGTGTGCGTTGACGAAGAAGAGCTCTATCTCCGGCGGAGCAAAGAAAACGATCTGTCTCCGTCGTAACACACAGTTTTTCGAGACCCTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCCGGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCCTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTAG",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment.sequences[2].seq,
|
||||||
|
"ATGAACAAAGTAGCGAGGAAGAACAAAACATCAGCAAAGAAAACGATCTGTCTCCGTCGTAACACACAGTTTTTCGAGACCCTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCCGGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCCTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTAG",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment[0],
|
||||||
|
"-----------------------------------------------------------------AGTTACAATAACTGACGAAGCTAAGTAGGCTACTAATTAACGTCATCAACCTAATACATAGCACTTAGAAAAAAGTGAAGTAAGAAAATATAAAATAATAAAAGGGTGGGTTATCAATTGATAGTGTAAATCATCGTATTCCGGTGATATACCCTACCACAAAAACTCAAACCGACTTGATTCAAATCATCTCAATAAATTAGCGCCAAAATAATGAAAAAAATAATAACAAACAAAAACAAACCAAAATAAGAAAAAACATTACGCAAAACATAATAATTTACTCTTCGTTATTGTATTAACAAATCAAAGAGCTGAATTTTGATCACCTGCTAATACTACTTTCTGTATTGATCCTATATCAACGTAAACAAAGATACTAATAATTAACTAAAAGTACGTTCATCGATCGTGTTCGTTGACGAAGAAGAGCTCTATCTCCGGCGGAGCAAAGAAAACGATCTGTCTCCGTCGTAACACACGGTCGCTAGAGAAACTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCCGGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCGTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTT-",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment[1],
|
||||||
|
"ATGAACAAAGTAGCGAGGAAGAACAAAACATCAGGTGAACAAAAAAAAAACTCAATCCACATCAAAGTTACAATAACTGACGAAGCTAAGTAGGCTAGAAATTAAAGTCATCAACCTAATACATAGCACTTAGAAAAAAGTGAAGCAAGAAAATATAAAATAATAAAAGGGTGGGTTATCAATTGATAGTGTAAATCATAGTTGATTTTTGATATACCCTACCACAAAAACTCAAACCGACTTGATTCAAATCATCTCAAAAAACAAGCGCCAAAATAATGAAAAAAATAATAACAAAAACAAACAAACCAAAATAAGAAAAAACATTACGCAAAACATAATAATTTACTCTTCGTTATTGTATTAACAAATCAAAGAGATGAATTTTGATCACCTGCTAATACTACTTTCTGTATTGATCCTATATCAAAAAAAAAAAAGATACTAATAATTAACTAAAAGTACGTTCATCGATCGTGTGCGTTGACGAAGAAGAGCTCTATCTCCGGCGGAGCAAAGAAAACGATCTGTCTCCGTCGTAACACACAGTTTTTCGAGACCCTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCCGGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCCTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTAG",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment[2],
|
||||||
|
"--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ATGAACAAAGTAGCGAGGAAGAA------------------------------CAAAACATC----------------------------------------------------------------------------------------------------------------------------------------------------------------------------AGCAAAGAAAACGATCTGTCTCCGTCGTAACACACAGTTTTTCGAGACCCTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCCGGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCCTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTAG",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment.column_annotations["state"],
|
||||||
|

|
||||||
|
)
|
||||||
|
self.check_reading_writing(path)
|
||||||
|
|
||||||
|
def test_kalign(self):
|
||||||
|
path = "Clustalw/kalign.a2m"
|
||||||
|
with open(path) as stream:
|
||||||
|
alignments = AlignmentIterator(stream)
|
||||||
|
alignment = next(alignments)
|
||||||
|
with self.assertRaises(StopIteration):
|
||||||
|
next(alignments)
|
||||||
|
self.assertEqual(
|
||||||
|
repr(alignment),
|
||||||
|
"<Bio.Align.Alignment object (2 rows x 27 columns) at 0x%x>"
|
||||||
|
% id(alignment),
|
||||||
|
)
|
||||||
|
self.assertEqual(len(alignment), 2)
|
||||||
|
self.assertEqual(alignment.sequences[0].id, "Test1seq")
|
||||||
|
self.assertEqual(alignment.sequences[1].id, "AT3G20900")
|
||||||
|
self.assertEqual(alignment.sequences[0].seq, "GCTGGGGATGGAGAGGGAACAGAGTT")
|
||||||
|
self.assertEqual(alignment.sequences[1].seq, "GCTGGGGATGGAGAGGGAACAGAGTAG")
|
||||||
|
self.assertEqual(alignment[0], "GCTGGGGATGGAGAGGGAACAGAGT-T")
|
||||||
|
self.assertEqual(alignment[1], "GCTGGGGATGGAGAGGGAACAGAGTAG")
|
||||||
|
self.assertEqual(
|
||||||
|
alignment.column_annotations["state"],
|
||||||
|
"DDDDDDDDDDDDDDDDDDDDDDDDDID",
|
||||||
|
)
|
||||||
|
self.check_reading_writing(path)
|
||||||
|
|
||||||
|
def test_probcons(self):
|
||||||
|
path = "Clustalw/probcons.a2m"
|
||||||
|
# example taken from the PROBCONS documentation,
|
||||||
|
# and converted to aligned A2M format.
|
||||||
|
with open(path) as stream:
|
||||||
|
alignments = AlignmentIterator(stream)
|
||||||
|
alignment = next(alignments)
|
||||||
|
with self.assertRaises(StopIteration):
|
||||||
|
next(alignments)
|
||||||
|
self.assertEqual(
|
||||||
|
repr(alignment),
|
||||||
|
"<Bio.Align.Alignment object (5 rows x 101 columns) at 0x%x>"
|
||||||
|
% id(alignment),
|
||||||
|
)
|
||||||
|
self.assertEqual(len(alignment), 5)
|
||||||
|
self.assertEqual(alignment.sequences[0].id, "plas_horvu")
|
||||||
|
self.assertEqual(alignment.sequences[1].id, "plas_chlre")
|
||||||
|
self.assertEqual(alignment.sequences[2].id, "plas_anava")
|
||||||
|
self.assertEqual(alignment.sequences[3].id, "plas_proho")
|
||||||
|
self.assertEqual(alignment.sequences[4].id, "azup_achcy")
|
||||||
|
self.assertEqual(
|
||||||
|
alignment.sequences[0].seq,
|
||||||
|
"DVLLGANGGVLVFEPNDFSVKAGETITFKNNAGYPHNVVFDEDAVPSGVDVSKISQEEYLTAPGETFSVTLTVPGTYGFYCEPHAGAGMVGKVTV",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment.sequences[1].seq,
|
||||||
|
"VKLGADSGALEFVPKTLTIKSGETVNFVNNAGFPHNIVFDEDAIPSGVNADAISRDDYLNAPGETYSVKLTAAGEYGYYCEPHQGAGMVGKIIV",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment.sequences[2].seq,
|
||||||
|
"VKLGSDKGLLVFEPAKLTIKPGDTVEFLNNKVPPHNVVFDAALNPAKSADLAKSLSHKQLLMSPGQSTSTTFPADAPAGEYTFYCEPHRGAGMVGKITV",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment.sequences[3].seq,
|
||||||
|
"VQIKMGTDKYAPLYEPKALSISAGDTVEFVMNKVGPHNVIFDKVPAGESAPALSNTKLRIAPGSFYSVTLGTPGTYSFYCTPHRGAGMVGTITV",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment.sequences[4].seq,
|
||||||
|
"VHMLNKGKDGAMVFEPASLKVAPGDTVTFIPTDKGHNVETIKGMIPDGAEAFKSKINENYKVTFTAPGVYGVKCTPHYGMGMVGVVEV",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment[0],
|
||||||
|
"D-VLLGANGGVLVFEPNDFSVKAGETITFKNNAGYPHNVVFDEDAVPSG-VD-VSKISQEEYLTAPGETFSVTLTV---PGTYGFYCEPHAGAGMVGKVTV",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment[1],
|
||||||
|
"--VKLGADSGALEFVPKTLTIKSGETVNFVNNAGFPHNIVFDEDAIPSG-VN-ADAISRDDYLNAPGETYSVKLTA---AGEYGYYCEPHQGAGMVGKIIV",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment[2],
|
||||||
|
"--VKLGSDKGLLVFEPAKLTIKPGDTVEFLNNKVPPHNVVFDAALNPAKSADLAKSLSHKQLLMSPGQSTSTTFPADAPAGEYTFYCEPHRGAGMVGKITV",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment[3],
|
||||||
|
"VQIKMGTDKYAPLYEPKALSISAGDTVEFVMNKVGPHNVIFDK--VPAG-ES-APALSNTKLRIAPGSFYSVTLGT---PGTYSFYCTPHRGAGMVGTITV",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment[4],
|
||||||
|
"VHMLNKGKDGAMVFEPASLKVAPGDTVTFIPTDK-GHNVETIKGMIPDG-AE-A-------FKSKINENYKVTFTA---PGVYGVKCTPHYGMGMVGVVEV",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment.column_annotations["state"],
|
||||||
|
"DIDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDIDDIDDDDDDDDDDDDDDDDDDDDDDDIIIDDDDDDDDDDDDDDDDDDDDDD",
|
||||||
|
)
|
||||||
|
self.check_reading_writing(path)
|
||||||
|
|
||||||
|
def test_empty(self):
|
||||||
|
"""Checking empty file."""
|
||||||
|
stream = StringIO()
|
||||||
|
alignments = AlignmentIterator(stream)
|
||||||
|
with self.assertRaises(ValueError):
|
||||||
|
next(alignments)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
runner = unittest.TextTestRunner(verbosity=2)
|
||||||
|
unittest.main(testRunner=runner)
|
Reference in New Issue
Block a user