mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 13:43:47 +08:00
Parser for aligned FASTA (#3962)
* adding parser for aligned FASTA * update * update * rename test function * update
This commit is contained in:
99
Bio/Align/fasta.py
Normal file
99
Bio/Align/fasta.py
Normal file
@ -0,0 +1,99 @@
|
||||
# Copyright 2022 by Michiel de Hoon. All rights reserved.
|
||||
#
|
||||
# This file is part of the Biopython distribution and governed by your
|
||||
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
|
||||
# Please see the LICENSE file that should have been included as part of this
|
||||
# package.
|
||||
"""Bio.Align support for aligned FASTA files.
|
||||
|
||||
Aligned FASTA files are FASTA files in which alignment gaps in a sequence are
|
||||
represented by dashes. Each sequence line in an aligned FASTA should have the
|
||||
same length.
|
||||
"""
|
||||
from Bio.Align import Alignment
|
||||
from Bio.Align import interfaces
|
||||
from Bio.Seq import Seq
|
||||
from Bio.SeqRecord import SeqRecord
|
||||
from Bio import BiopythonExperimentalWarning
|
||||
|
||||
import warnings
|
||||
|
||||
warnings.warn(
|
||||
"Bio.Align.fasta is an experimental module which may undergo "
|
||||
"significant changes prior to its future official release.",
|
||||
BiopythonExperimentalWarning,
|
||||
)
|
||||
|
||||
|
||||
class AlignmentWriter(interfaces.AlignmentWriter):
|
||||
"""Alignment file writer for the aligned FASTA file format."""
|
||||
|
||||
def __init__(self, target):
|
||||
"""Create an AlignmentWriter object.
|
||||
|
||||
Arguments:
|
||||
- target - output stream or file name
|
||||
|
||||
"""
|
||||
super().__init__(target, mode="w")
|
||||
|
||||
def format_alignment(self, alignment):
|
||||
"""Return a string with the alignment in aligned FASTA format."""
|
||||
if not isinstance(alignment, Alignment):
|
||||
raise TypeError("Expected an Alignment object")
|
||||
lines = []
|
||||
for sequence, line in zip(alignment.sequences, alignment):
|
||||
lines.append(f">{sequence.id} {sequence.description}")
|
||||
lines.append(line)
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
class AlignmentIterator(interfaces.AlignmentIterator):
|
||||
"""Alignment iterator for aligned FASTA files.
|
||||
|
||||
An aligned FASTA file contains one multiple alignment. Alignment gaps are
|
||||
represented by dashes in the sequence lines. Header lines start with '>'
|
||||
followed by the name of the sequence, and optionally a description.
|
||||
"""
|
||||
|
||||
def __init__(self, source):
|
||||
"""Create an AlignmentIterator object.
|
||||
|
||||
Arguments:
|
||||
- source - input data or file name
|
||||
|
||||
"""
|
||||
super().__init__(source, mode="t", fmt="FASTA")
|
||||
|
||||
def parse(self, stream):
|
||||
"""Parse the next alignment from the stream."""
|
||||
names = []
|
||||
descriptions = []
|
||||
lines = []
|
||||
for line in stream:
|
||||
if line.startswith(">"):
|
||||
parts = line[1:].rstrip().split(None, 1)
|
||||
try:
|
||||
name, description = parts
|
||||
except ValueError:
|
||||
name = parts[0]
|
||||
description = None
|
||||
names.append(name)
|
||||
descriptions.append(description)
|
||||
lines.append("")
|
||||
else:
|
||||
lines[-1] += line.strip()
|
||||
if not lines:
|
||||
raise ValueError("Empty file.")
|
||||
coordinates = Alignment.infer_coordinates(lines)
|
||||
records = []
|
||||
for name, description, line in zip(names, descriptions, lines):
|
||||
line = line.replace("-", "")
|
||||
sequence = Seq(line)
|
||||
if description is None:
|
||||
record = SeqRecord(sequence, name)
|
||||
else:
|
||||
record = SeqRecord(sequence, name, description=description)
|
||||
records.append(record)
|
||||
alignment = Alignment(records, coordinates)
|
||||
yield alignment
|
16
Tests/Clustalw/clustalw.fa
Normal file
16
Tests/Clustalw/clustalw.fa
Normal file
@ -0,0 +1,16 @@
|
||||
>gi|4959044|gb|AAD34209.1|AF069
|
||||
MENSDSNDKGSDQSAAQRRSQMDRLDREEAFYQFVNNLSEEDYRLMRDNNLLGTPGESTEEELLRRLQQIKEGPPPQSPDENRAGESSDDVTNSDSIIDW
|
||||
LNSVRQTGNTTRSRQRGNQSWRAVSRTNPNSGDFRFSLEINVNRNNGSQTSENESEPSTRRLSVENMESSSQRQMENSASESASARPSRAERNSTEAVTE
|
||||
VPTTRAQRRARSRSPEHRRTRARAERSMSPLQPTSEIPRRAPTLEQSSENEPEGSSRTRHHVTLRQQISGPELLGRGLFAASGSRNPSQGTSSSDTGSNS
|
||||
ESSGSGQRPPTIVLDLQVRRVRPGEYRQRDSIASRTRSRSQAPNNTVTYESERGGFRRTFSRSERAGVRTYVSTIRIPIRRILNTGLSETTSVAIQTMLR
|
||||
QIMTGFGELSYFMYSDSDSEPSASVSSRNVERVESRNGRGSSGGGNSSGSSSSSSPSPSSSGESSESSSKMFEGSSEGGSSGPSRKDGRHRAPVTFDESG
|
||||
SLPFFSLAQFFLLNEDDEDQPRGLTKEQIDNLAMRSFGENDALKTCSVCITEYTEGDKLRKLPCSHEFHVHCIDRWLSE-NSTCPICRRAVLSSGNRESV
|
||||
V
|
||||
>gi|671626|emb|CAA85685.1|
|
||||
---------MSPQTETKASVGFKAGVKEYKLTYYTPEYETKDTDILAAFRVTPQPG-----------------VPPEEAGAAVAAESSTGT---------
|
||||
WTTVWTDGLTSLDRYKG-----RCYHIEPVPG-------------------EKDQCICYVAYPLDLFEEGSVTNMFTSIVGNVFGFKALRALRLEDLRIP
|
||||
VAYVKTFQGPPHGIQVERDKLNKYGRPLLGCTIKPKLGLSAKNYGRAVYECLRGGLDFTKDDENVNSQPFMRWRDRFLFCAEAIYKAQAETGEIKGHYLN
|
||||
ATAG-----------------------TCEEMIKRAIFARELGVPIVMHDYLTGGFTANTSLAHYCRDNGLLLHIHRAMHAVIDRQKNHGMHFRVLAKAL
|
||||
RLSGGDHIHSGTVVGKLEGERDITLGFVDLLRDDFIEKDRSRGIYFTQDWVSLPGVIPVASG-----------------------------GIHVWHMPA
|
||||
LTEIFGDDSVLQFGGGTLGHPWGNAPGAVANRVA-----------VEACVKARNEG---RDLAAEGNAIIREACKWSPELAAACEVWKEIKFEFPAMD--
|
||||
-
|
4
Tests/Clustalw/kalign.fa
Normal file
4
Tests/Clustalw/kalign.fa
Normal file
@ -0,0 +1,4 @@
|
||||
>Test1seq
|
||||
GCTGGGGATGGAGAGGGAACAGAGT-T
|
||||
>AT3G20900
|
||||
GCTGGGGATGGAGAGGGAACAGAGTAG
|
32
Tests/Clustalw/msaprobs.fa
Normal file
32
Tests/Clustalw/msaprobs.fa
Normal file
@ -0,0 +1,32 @@
|
||||
>V_Harveyi_PATH
|
||||
MKNW--------IKV----AVAAI-A--LSAA-------------------TVQAATEVKVGMSGRYFPFTFVK--QDKLQGFEVDMWDEIGKRNDYKIE
|
||||
YVTANFSGLFGLLETGRIDTISNQITMTDARKAKYLFADPYVVDGAQITVRK-GNDSIQGVEDLAGKTVAVNLGSNFEQLLRDYDKDGKINIKTYDT--G
|
||||
IEHDVALGRADAFIMDRLSALE-LIKKTG-LPLQLAGEPFE-----TIQNAWPFVDNEKGRKLQAEVNKALAEMRADGTVEKISVKWFGADITK----
|
||||
>B_subtilis_YXEM
|
||||
MKMKKW------TVL----VVAALLA-VLSACGN------------G-NSSSKEDDNVLHVGATGQSYPFAYKE--NGKLTGFDVEVMEAVAKKIDMKLD
|
||||
WKLLEFSGLMGELQTGKLDTISNQVAVTDERKETYNFTKPYAYAGTQIVVKK-DNTDIKSVDDLKGKTVAAVLGSNHAKNLESKDPDKKINIKTYETQEG
|
||||
TLKDVAYGRVDAYVNSRTVLIA-QIKKTG-LPLKLAGDPIV-----YEQVAFPFAKDDAHDKLRKKVNKALDELRKDGTLKKLSEKYFNEDITVEQKH
|
||||
>FLIY_ECOLI
|
||||
MKLAHLGRQALMGVM----AVALVAG--MSVKSF---------ADEG-LLNKVKERGTLLVGLEGTYPPFSFQGD-DGKLTGFEVEFAQQLAKHLGVEAS
|
||||
LKPTKWDGMLASLDSKRIDVVINQVTISDERKKKYDFSTPYTISGIQALVKKGNEGTIKTADDLKGKKVGVGLGTNYEEWLRQN--VQGVDVRTYDDDPT
|
||||
KYQDLRVGRIDAILVDRLAALD-LVKKTN-DTLAVTGEAFS-----RQESGVALRK--GNEDLLKAVNDAIAEMQKDGTLQALSEKWFGADVTK----
|
||||
>Deinococcus_radiodurans
|
||||
MKKSLL------SLKLSGLLVPSVLALSLSACSS---------------PSSTLNQGTLKIAMEGTYPPFTSKNE-QGELVGFDVDIAKAVAQKLNLKPE
|
||||
FVLTEWSGILAGLQANKYDVIVNQVGITPERQNSIGFSQPYAYSRPEIIVAKNNTFNPQSLADLKGKRVGSTLGSNYEKQLI-D--TGDIKIVTYPGAPE
|
||||
ILADLVAGRIDAAYNDRLVVNY-IIND-QKLPVRGAGQIGD-----AAPVGIALKK--GNSALKDQIDKALTEMRSDGTFEKISQKWFGQDVGQ---P
|
||||
>B_subtilis_GlnH_homo_YCKK
|
||||
MKKALL------ALF----MVVSIAA--LAACGAGNDNQSKDNAKDGDLWASIKKKGVLTVGTEGTYEPFTYHDKDTDKLTGYDVEVITEVAKRLGLKVD
|
||||
FKETQWGSMFAGLNSKRFDVVANQVGKTD-REDKYDFSDKYTTSRAVVVTKK-DNNDIKSEADVKGKTSAQSLTSNYNKLAT-N--A-GAKVEGVEGMAQ
|
||||
ALQMIQQARVDMTYNDKLAVLN-YLKTSGNKNVKIAFETGE-----PQSTYFTFRK--GSGEVVDQVNKALKEMKEDGTLSKISKKWFGEDVSK----
|
||||
>YA80_HAEIN
|
||||
MKKLLF------TTA----LLTGAIA--FSTFS-----------HAGEIADRVEKTKTLLVGTEGTYAPFTFHDK-SGKLTGFDVEVIRKVAEKLGLKVE
|
||||
FKETQWDAMYAGLNAKRFDVIANQTNPSPERLKKYSFTTPYNYSGGVIVTKS-SDNSIKSFEDLKGRKSAQSATSNWGKDAK-A--A-GAQILVVDGLAQ
|
||||
SLELIKQGRAEATINDKLAVLD-YFKQHPNSGLKIAYDRGD-----KTPTAFAFLQ--GEDALITKFNQVLEALRQDGTLKQISIEWFGYDITQ----
|
||||
>E_coli_GlnH
|
||||
MKSVL-------KVS----LAALTLA--FAVSSH---------A----------ADKKLVVATDTAFVPFEFKQ--GDKYVGFDVDLWAAIAKELKLDYE
|
||||
LKPMDFSGIIPALQTKNVDLALAGITITDERKKAIDFSDGYYKSGLLVMVKAN-NNDVKSVKDLDGKVVAVKSGTGSVDYAKAN--IKTKDLRQFPNIDN
|
||||
AYMELGTNRADAVLHDTPNILY-FIKTAGNGQFKAVGDSLE-----AQQYGIAFPK--GSDELRDKVNGALKTLRENGTYNEIYKKWFGTEP-K----
|
||||
>HISJ_E_COLI
|
||||
MKKLVL------SLS----LV---LA--FSSATA---------------A-FAAIPQNIRIGTDPTYAPFESKNS-QGELVGFDIDLAKELCKRINTQCT
|
||||
FVENPLDALIPSLKAKKIDAIMSSLSITEKRQQEIAFTDKLYAADSRLVVAK-NSDIQPTVESLKGKRVGVLQGTTQETFGNEHWAPKGIEIVSYQGQDN
|
||||
IYSDLTAGRIDAAFQDEVAASEGFLKQPVGKDYKFGGPSVKDEKLFGVGTGMGLRK--EDNELREALNKAFAEMRADGTYEKLAKKYFDFDVYG---G
|
24
Tests/Clustalw/muscle.fa
Normal file
24
Tests/Clustalw/muscle.fa
Normal file
@ -0,0 +1,24 @@
|
||||
>Test1seq
|
||||
-----------------------------------------------------------------AGTTACAATAACTGACGAAGCTAAGTAGGCTACTA
|
||||
ATTAACGTCATCAACCTAATACATAGCACTTAGAAAAAAGTGAAGTAAGAAAATATAAAATAATAAAAGGGTGGGTTATCAATTGATAGTGTAAATCATC
|
||||
GTATTCCGGTGATATACCCTACCACAAAAACTCAAACCGACTTGATTCAAATCATCTCAATAAATTAGCGCCAAAATAATGAAAAAAATAATAACAAACA
|
||||
AAAACAAACCAAAATAAGAAAAAACATTACGCAAAACATAATAATTTACTCTTCGTTATTGTATTAACAAATCAAAGAGCTGAATTTTGATCACCTGCTA
|
||||
ATACTACTTTCTGTATTGATCCTATATCAACGTAAACAAAGATACTAATAATTAACTAAAAGTACGTTCATCGATCGTGTTCGTTGACGAAGAAGAGCTC
|
||||
TATCTCCGGCGGAGCAAAGAAAACGATCTGTCTCCGTCGTAACACACGGTCGCTAGAGAAACTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCC
|
||||
GGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCGTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTT-
|
||||
>AT3G20900.1-SEQ
|
||||
ATGAACAAAGTAGCGAGGAAGAACAAAACATCAGGTGAACAAAAAAAAAACTCAATCCACATCAAAGTTACAATAACTGACGAAGCTAAGTAGGCTAGAA
|
||||
ATTAAAGTCATCAACCTAATACATAGCACTTAGAAAAAAGTGAAGCAAGAAAATATAAAATAATAAAAGGGTGGGTTATCAATTGATAGTGTAAATCATA
|
||||
GTTGATTTTTGATATACCCTACCACAAAAACTCAAACCGACTTGATTCAAATCATCTCAAAAAACAAGCGCCAAAATAATGAAAAAAATAATAACAAAAA
|
||||
CAAACAAACCAAAATAAGAAAAAACATTACGCAAAACATAATAATTTACTCTTCGTTATTGTATTAACAAATCAAAGAGATGAATTTTGATCACCTGCTA
|
||||
ATACTACTTTCTGTATTGATCCTATATCAAAAAAAAAAAAGATACTAATAATTAACTAAAAGTACGTTCATCGATCGTGTGCGTTGACGAAGAAGAGCTC
|
||||
TATCTCCGGCGGAGCAAAGAAAACGATCTGTCTCCGTCGTAACACACAGTTTTTCGAGACCCTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCC
|
||||
GGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCCTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTAG
|
||||
>AT3G20900.1-CDS
|
||||
----------------------------------------------------------------------------------------------------
|
||||
----------------------------------------------------------------------------------------------------
|
||||
------------------------------------------------------------------------------ATGAACAAAGTAGCGAGGAAGA
|
||||
A------------------------------CAAAACATC------------------------------------------------------------
|
||||
----------------------------------------------------------------------------------------------------
|
||||
------------AGCAAAGAAAACGATCTGTCTCCGTCGTAACACACAGTTTTTCGAGACCCTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCC
|
||||
GGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCCTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTAG
|
10
Tests/Clustalw/probcons.fa
Normal file
10
Tests/Clustalw/probcons.fa
Normal file
@ -0,0 +1,10 @@
|
||||
>plas_horvu
|
||||
D-VLLGANGGVLVFEPNDFSVKAGETITFKNNAGYPHNVVFDEDAVPSG-VD-VSKISQEEYLTAPGETFSVTLTV---PGTYGFYCEPHAGAGMVGKVTV
|
||||
>plas_chlre
|
||||
--VKLGADSGALEFVPKTLTIKSGETVNFVNNAGFPHNIVFDEDAIPSG-VN-ADAISRDDYLNAPGETYSVKLTA---AGEYGYYCEPHQGAGMVGKIIV
|
||||
>plas_anava
|
||||
--VKLGSDKGLLVFEPAKLTIKPGDTVEFLNNKVPPHNVVFDAALNPAKSADLAKSLSHKQLLMSPGQSTSTTFPADAPAGEYTFYCEPHRGAGMVGKITV
|
||||
>plas_proho
|
||||
VQIKMGTDKYAPLYEPKALSISAGDTVEFVMNKVGPHNVIFDK--VPAG-ES-APALSNTKLRIAPGSFYSVTLGT---PGTYSFYCTPHRGAGMVGTITV
|
||||
>azup_achcy
|
||||
VHMLNKGKDGAMVFEPASLKVAPGDTVTFIPTDK-GHNVETIKGMIPDG-AE-A-------FKSKINENYKVTFTA---PGVYGVKCTPHYGMGMVGVVEV
|
301
Tests/test_Align_fasta.py
Normal file
301
Tests/test_Align_fasta.py
Normal file
@ -0,0 +1,301 @@
|
||||
# Copyright 2006-2014 by Peter Cock. All rights reserved.
|
||||
# Copyright 2022 by Michiel de Hoon. All rights reserved.
|
||||
# This code is part of the Biopython distribution and governed by its
|
||||
# license. Please see the LICENSE file that should have been included
|
||||
# as part of this package.
|
||||
"""Tests for Bio.Align.fasta module."""
|
||||
import unittest
|
||||
import warnings
|
||||
|
||||
from io import StringIO
|
||||
|
||||
from Bio import BiopythonExperimentalWarning
|
||||
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore", BiopythonExperimentalWarning)
|
||||
from Bio.Align.fasta import AlignmentIterator
|
||||
from Bio.Align.fasta import AlignmentWriter
|
||||
|
||||
|
||||
class TestFASTAReadingWriting(unittest.TestCase):
|
||||
def check_reading_writing(self, path):
|
||||
alignments = AlignmentIterator(path)
|
||||
stream = StringIO()
|
||||
writer = AlignmentWriter(stream)
|
||||
n = writer.write_file(alignments, mincount=1, maxcount=1)
|
||||
self.assertEqual(n, 1)
|
||||
alignments = AlignmentIterator(path)
|
||||
alignment = next(alignments)
|
||||
stream.seek(0)
|
||||
saved_alignments = AlignmentIterator(stream)
|
||||
saved_alignment = next(saved_alignments)
|
||||
with self.assertRaises(StopIteration):
|
||||
next(saved_alignments)
|
||||
self.assertEqual(len(alignment), len(saved_alignment))
|
||||
for i, (sequence, saved_sequence) in enumerate(
|
||||
zip(alignment.sequences, saved_alignment.sequences)
|
||||
):
|
||||
self.assertEqual(sequence.id, saved_sequence.id)
|
||||
self.assertEqual(sequence.seq, saved_sequence.seq)
|
||||
self.assertEqual(alignment[i], saved_alignment[i])
|
||||
|
||||
def test_clustalw(self):
|
||||
path = "Clustalw/clustalw.fa"
|
||||
with open(path) as stream:
|
||||
alignments = AlignmentIterator(stream)
|
||||
alignment = next(alignments)
|
||||
with self.assertRaises(StopIteration):
|
||||
next(alignments)
|
||||
self.assertEqual(
|
||||
repr(alignment),
|
||||
"<Bio.Align.Alignment object (2 rows x 601 columns) at 0x%x>"
|
||||
% id(alignment),
|
||||
)
|
||||
self.assertEqual(len(alignment), 2)
|
||||
self.assertEqual(alignment.sequences[0].id, "gi|4959044|gb|AAD34209.1|AF069")
|
||||
self.assertEqual(alignment.sequences[1].id, "gi|671626|emb|CAA85685.1|")
|
||||
self.assertEqual(
|
||||
alignment.sequences[0].seq,
|
||||
"MENSDSNDKGSDQSAAQRRSQMDRLDREEAFYQFVNNLSEEDYRLMRDNNLLGTPGESTEEELLRRLQQIKEGPPPQSPDENRAGESSDDVTNSDSIIDWLNSVRQTGNTTRSRQRGNQSWRAVSRTNPNSGDFRFSLEINVNRNNGSQTSENESEPSTRRLSVENMESSSQRQMENSASESASARPSRAERNSTEAVTEVPTTRAQRRARSRSPEHRRTRARAERSMSPLQPTSEIPRRAPTLEQSSENEPEGSSRTRHHVTLRQQISGPELLGRGLFAASGSRNPSQGTSSSDTGSNSESSGSGQRPPTIVLDLQVRRVRPGEYRQRDSIASRTRSRSQAPNNTVTYESERGGFRRTFSRSERAGVRTYVSTIRIPIRRILNTGLSETTSVAIQTMLRQIMTGFGELSYFMYSDSDSEPSASVSSRNVERVESRNGRGSSGGGNSSGSSSSSSPSPSSSGESSESSSKMFEGSSEGGSSGPSRKDGRHRAPVTFDESGSLPFFSLAQFFLLNEDDEDQPRGLTKEQIDNLAMRSFGENDALKTCSVCITEYTEGDKLRKLPCSHEFHVHCIDRWLSENSTCPICRRAVLSSGNRESVV",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment.sequences[1].seq,
|
||||
"MSPQTETKASVGFKAGVKEYKLTYYTPEYETKDTDILAAFRVTPQPGVPPEEAGAAVAAESSTGTWTTVWTDGLTSLDRYKGRCYHIEPVPGEKDQCICYVAYPLDLFEEGSVTNMFTSIVGNVFGFKALRALRLEDLRIPVAYVKTFQGPPHGIQVERDKLNKYGRPLLGCTIKPKLGLSAKNYGRAVYECLRGGLDFTKDDENVNSQPFMRWRDRFLFCAEAIYKAQAETGEIKGHYLNATAGTCEEMIKRAIFARELGVPIVMHDYLTGGFTANTSLAHYCRDNGLLLHIHRAMHAVIDRQKNHGMHFRVLAKALRLSGGDHIHSGTVVGKLEGERDITLGFVDLLRDDFIEKDRSRGIYFTQDWVSLPGVIPVASGGIHVWHMPALTEIFGDDSVLQFGGGTLGHPWGNAPGAVANRVAVEACVKARNEGRDLAAEGNAIIREACKWSPELAAACEVWKEIKFEFPAMD",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment[0],
|
||||
"MENSDSNDKGSDQSAAQRRSQMDRLDREEAFYQFVNNLSEEDYRLMRDNNLLGTPGESTEEELLRRLQQIKEGPPPQSPDENRAGESSDDVTNSDSIIDWLNSVRQTGNTTRSRQRGNQSWRAVSRTNPNSGDFRFSLEINVNRNNGSQTSENESEPSTRRLSVENMESSSQRQMENSASESASARPSRAERNSTEAVTEVPTTRAQRRARSRSPEHRRTRARAERSMSPLQPTSEIPRRAPTLEQSSENEPEGSSRTRHHVTLRQQISGPELLGRGLFAASGSRNPSQGTSSSDTGSNSESSGSGQRPPTIVLDLQVRRVRPGEYRQRDSIASRTRSRSQAPNNTVTYESERGGFRRTFSRSERAGVRTYVSTIRIPIRRILNTGLSETTSVAIQTMLRQIMTGFGELSYFMYSDSDSEPSASVSSRNVERVESRNGRGSSGGGNSSGSSSSSSPSPSSSGESSESSSKMFEGSSEGGSSGPSRKDGRHRAPVTFDESGSLPFFSLAQFFLLNEDDEDQPRGLTKEQIDNLAMRSFGENDALKTCSVCITEYTEGDKLRKLPCSHEFHVHCIDRWLSE-NSTCPICRRAVLSSGNRESVV",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment[1],
|
||||
"---------MSPQTETKASVGFKAGVKEYKLTYYTPEYETKDTDILAAFRVTPQPG-----------------VPPEEAGAAVAAESSTGT---------WTTVWTDGLTSLDRYKG-----RCYHIEPVPG-------------------EKDQCICYVAYPLDLFEEGSVTNMFTSIVGNVFGFKALRALRLEDLRIPVAYVKTFQGPPHGIQVERDKLNKYGRPLLGCTIKPKLGLSAKNYGRAVYECLRGGLDFTKDDENVNSQPFMRWRDRFLFCAEAIYKAQAETGEIKGHYLNATAG-----------------------TCEEMIKRAIFARELGVPIVMHDYLTGGFTANTSLAHYCRDNGLLLHIHRAMHAVIDRQKNHGMHFRVLAKALRLSGGDHIHSGTVVGKLEGERDITLGFVDLLRDDFIEKDRSRGIYFTQDWVSLPGVIPVASG-----------------------------GIHVWHMPALTEIFGDDSVLQFGGGTLGHPWGNAPGAVANRVA-----------VEACVKARNEG---RDLAAEGNAIIREACKWSPELAAACEVWKEIKFEFPAMD---",
|
||||
)
|
||||
self.check_reading_writing(path)
|
||||
|
||||
def test_msaprobs(self):
|
||||
path = "Clustalw/msaprobs.fa"
|
||||
# This example was obtained from
|
||||
# http://virgil.ruc.dk/kurser/Sekvens/Treedraw.htm
|
||||
# and converted to aligned FASTA format.
|
||||
with open(path) as stream:
|
||||
alignments = AlignmentIterator(stream)
|
||||
alignment = next(alignments)
|
||||
with self.assertRaises(StopIteration):
|
||||
next(alignments)
|
||||
self.assertEqual(
|
||||
repr(alignment),
|
||||
"<Bio.Align.Alignment object (8 rows x 298 columns) at 0x%x>"
|
||||
% id(alignment),
|
||||
)
|
||||
self.assertEqual(len(alignment), 8)
|
||||
self.assertEqual(alignment.shape, (8, 298))
|
||||
self.assertEqual(alignment.sequences[0].id, "V_Harveyi_PATH")
|
||||
self.assertEqual(alignment.sequences[1].id, "B_subtilis_YXEM")
|
||||
self.assertEqual(alignment.sequences[2].id, "FLIY_ECOLI")
|
||||
self.assertEqual(alignment.sequences[3].id, "Deinococcus_radiodurans")
|
||||
self.assertEqual(alignment.sequences[4].id, "B_subtilis_GlnH_homo_YCKK")
|
||||
self.assertEqual(alignment.sequences[5].id, "YA80_HAEIN")
|
||||
self.assertEqual(alignment.sequences[6].id, "E_coli_GlnH")
|
||||
self.assertEqual(alignment.sequences[7].id, "HISJ_E_COLI")
|
||||
self.assertEqual(
|
||||
alignment.sequences[0].seq,
|
||||
"MKNWIKVAVAAIALSAATVQAATEVKVGMSGRYFPFTFVKQDKLQGFEVDMWDEIGKRNDYKIEYVTANFSGLFGLLETGRIDTISNQITMTDARKAKYLFADPYVVDGAQITVRKGNDSIQGVEDLAGKTVAVNLGSNFEQLLRDYDKDGKINIKTYDTGIEHDVALGRADAFIMDRLSALELIKKTGLPLQLAGEPFETIQNAWPFVDNEKGRKLQAEVNKALAEMRADGTVEKISVKWFGADITK",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment.sequences[1].seq,
|
||||
"MKMKKWTVLVVAALLAVLSACGNGNSSSKEDDNVLHVGATGQSYPFAYKENGKLTGFDVEVMEAVAKKIDMKLDWKLLEFSGLMGELQTGKLDTISNQVAVTDERKETYNFTKPYAYAGTQIVVKKDNTDIKSVDDLKGKTVAAVLGSNHAKNLESKDPDKKINIKTYETQEGTLKDVAYGRVDAYVNSRTVLIAQIKKTGLPLKLAGDPIVYEQVAFPFAKDDAHDKLRKKVNKALDELRKDGTLKKLSEKYFNEDITVEQKH",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment.sequences[2].seq,
|
||||
"MKLAHLGRQALMGVMAVALVAGMSVKSFADEGLLNKVKERGTLLVGLEGTYPPFSFQGDDGKLTGFEVEFAQQLAKHLGVEASLKPTKWDGMLASLDSKRIDVVINQVTISDERKKKYDFSTPYTISGIQALVKKGNEGTIKTADDLKGKKVGVGLGTNYEEWLRQNVQGVDVRTYDDDPTKYQDLRVGRIDAILVDRLAALDLVKKTNDTLAVTGEAFSRQESGVALRKGNEDLLKAVNDAIAEMQKDGTLQALSEKWFGADVTK",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment.sequences[3].seq,
|
||||
"MKKSLLSLKLSGLLVPSVLALSLSACSSPSSTLNQGTLKIAMEGTYPPFTSKNEQGELVGFDVDIAKAVAQKLNLKPEFVLTEWSGILAGLQANKYDVIVNQVGITPERQNSIGFSQPYAYSRPEIIVAKNNTFNPQSLADLKGKRVGSTLGSNYEKQLIDTGDIKIVTYPGAPEILADLVAGRIDAAYNDRLVVNYIINDQKLPVRGAGQIGDAAPVGIALKKGNSALKDQIDKALTEMRSDGTFEKISQKWFGQDVGQP",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment.sequences[4].seq,
|
||||
"MKKALLALFMVVSIAALAACGAGNDNQSKDNAKDGDLWASIKKKGVLTVGTEGTYEPFTYHDKDTDKLTGYDVEVITEVAKRLGLKVDFKETQWGSMFAGLNSKRFDVVANQVGKTDREDKYDFSDKYTTSRAVVVTKKDNNDIKSEADVKGKTSAQSLTSNYNKLATNAGAKVEGVEGMAQALQMIQQARVDMTYNDKLAVLNYLKTSGNKNVKIAFETGEPQSTYFTFRKGSGEVVDQVNKALKEMKEDGTLSKISKKWFGEDVSK",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment.sequences[5].seq,
|
||||
"MKKLLFTTALLTGAIAFSTFSHAGEIADRVEKTKTLLVGTEGTYAPFTFHDKSGKLTGFDVEVIRKVAEKLGLKVEFKETQWDAMYAGLNAKRFDVIANQTNPSPERLKKYSFTTPYNYSGGVIVTKSSDNSIKSFEDLKGRKSAQSATSNWGKDAKAAGAQILVVDGLAQSLELIKQGRAEATINDKLAVLDYFKQHPNSGLKIAYDRGDKTPTAFAFLQGEDALITKFNQVLEALRQDGTLKQISIEWFGYDITQ",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment.sequences[6].seq,
|
||||
"MKSVLKVSLAALTLAFAVSSHAADKKLVVATDTAFVPFEFKQGDKYVGFDVDLWAAIAKELKLDYELKPMDFSGIIPALQTKNVDLALAGITITDERKKAIDFSDGYYKSGLLVMVKANNNDVKSVKDLDGKVVAVKSGTGSVDYAKANIKTKDLRQFPNIDNAYMELGTNRADAVLHDTPNILYFIKTAGNGQFKAVGDSLEAQQYGIAFPKGSDELRDKVNGALKTLRENGTYNEIYKKWFGTEPK",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment.sequences[7].seq,
|
||||
"MKKLVLSLSLVLAFSSATAAFAAIPQNIRIGTDPTYAPFESKNSQGELVGFDIDLAKELCKRINTQCTFVENPLDALIPSLKAKKIDAIMSSLSITEKRQQEIAFTDKLYAADSRLVVAKNSDIQPTVESLKGKRVGVLQGTTQETFGNEHWAPKGIEIVSYQGQDNIYSDLTAGRIDAAFQDEVAASEGFLKQPVGKDYKFGGPSVKDEKLFGVGTGMGLRKEDNELREALNKAFAEMRADGTYEKLAKKYFDFDVYGG",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment[0],
|
||||
"MKNW--------IKV----AVAAI-A--LSAA-------------------TVQAATEVKVGMSGRYFPFTFVK--QDKLQGFEVDMWDEIGKRNDYKIEYVTANFSGLFGLLETGRIDTISNQITMTDARKAKYLFADPYVVDGAQITVRK-GNDSIQGVEDLAGKTVAVNLGSNFEQLLRDYDKDGKINIKTYDT--GIEHDVALGRADAFIMDRLSALE-LIKKTG-LPLQLAGEPFE-----TIQNAWPFVDNEKGRKLQAEVNKALAEMRADGTVEKISVKWFGADITK----",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment[1],
|
||||
"MKMKKW------TVL----VVAALLA-VLSACGN------------G-NSSSKEDDNVLHVGATGQSYPFAYKE--NGKLTGFDVEVMEAVAKKIDMKLDWKLLEFSGLMGELQTGKLDTISNQVAVTDERKETYNFTKPYAYAGTQIVVKK-DNTDIKSVDDLKGKTVAAVLGSNHAKNLESKDPDKKINIKTYETQEGTLKDVAYGRVDAYVNSRTVLIA-QIKKTG-LPLKLAGDPIV-----YEQVAFPFAKDDAHDKLRKKVNKALDELRKDGTLKKLSEKYFNEDITVEQKH",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment[2],
|
||||
"MKLAHLGRQALMGVM----AVALVAG--MSVKSF---------ADEG-LLNKVKERGTLLVGLEGTYPPFSFQGD-DGKLTGFEVEFAQQLAKHLGVEASLKPTKWDGMLASLDSKRIDVVINQVTISDERKKKYDFSTPYTISGIQALVKKGNEGTIKTADDLKGKKVGVGLGTNYEEWLRQN--VQGVDVRTYDDDPTKYQDLRVGRIDAILVDRLAALD-LVKKTN-DTLAVTGEAFS-----RQESGVALRK--GNEDLLKAVNDAIAEMQKDGTLQALSEKWFGADVTK----",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment[3],
|
||||
"MKKSLL------SLKLSGLLVPSVLALSLSACSS---------------PSSTLNQGTLKIAMEGTYPPFTSKNE-QGELVGFDVDIAKAVAQKLNLKPEFVLTEWSGILAGLQANKYDVIVNQVGITPERQNSIGFSQPYAYSRPEIIVAKNNTFNPQSLADLKGKRVGSTLGSNYEKQLI-D--TGDIKIVTYPGAPEILADLVAGRIDAAYNDRLVVNY-IIND-QKLPVRGAGQIGD-----AAPVGIALKK--GNSALKDQIDKALTEMRSDGTFEKISQKWFGQDVGQ---P",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment[4],
|
||||
"MKKALL------ALF----MVVSIAA--LAACGAGNDNQSKDNAKDGDLWASIKKKGVLTVGTEGTYEPFTYHDKDTDKLTGYDVEVITEVAKRLGLKVDFKETQWGSMFAGLNSKRFDVVANQVGKTD-REDKYDFSDKYTTSRAVVVTKK-DNNDIKSEADVKGKTSAQSLTSNYNKLAT-N--A-GAKVEGVEGMAQALQMIQQARVDMTYNDKLAVLN-YLKTSGNKNVKIAFETGE-----PQSTYFTFRK--GSGEVVDQVNKALKEMKEDGTLSKISKKWFGEDVSK----",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment[5],
|
||||
"MKKLLF------TTA----LLTGAIA--FSTFS-----------HAGEIADRVEKTKTLLVGTEGTYAPFTFHDK-SGKLTGFDVEVIRKVAEKLGLKVEFKETQWDAMYAGLNAKRFDVIANQTNPSPERLKKYSFTTPYNYSGGVIVTKS-SDNSIKSFEDLKGRKSAQSATSNWGKDAK-A--A-GAQILVVDGLAQSLELIKQGRAEATINDKLAVLD-YFKQHPNSGLKIAYDRGD-----KTPTAFAFLQ--GEDALITKFNQVLEALRQDGTLKQISIEWFGYDITQ----",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment[6],
|
||||
"MKSVL-------KVS----LAALTLA--FAVSSH---------A----------ADKKLVVATDTAFVPFEFKQ--GDKYVGFDVDLWAAIAKELKLDYELKPMDFSGIIPALQTKNVDLALAGITITDERKKAIDFSDGYYKSGLLVMVKAN-NNDVKSVKDLDGKVVAVKSGTGSVDYAKAN--IKTKDLRQFPNIDNAYMELGTNRADAVLHDTPNILY-FIKTAGNGQFKAVGDSLE-----AQQYGIAFPK--GSDELRDKVNGALKTLRENGTYNEIYKKWFGTEP-K----",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment[7],
|
||||
"MKKLVL------SLS----LV---LA--FSSATA---------------A-FAAIPQNIRIGTDPTYAPFESKNS-QGELVGFDIDLAKELCKRINTQCTFVENPLDALIPSLKAKKIDAIMSSLSITEKRQQEIAFTDKLYAADSRLVVAK-NSDIQPTVESLKGKRVGVLQGTTQETFGNEHWAPKGIEIVSYQGQDNIYSDLTAGRIDAAFQDEVAASEGFLKQPVGKDYKFGGPSVKDEKLFGVGTGMGLRK--EDNELREALNKAFAEMRADGTYEKLAKKYFDFDVYG---G",
|
||||
)
|
||||
self.check_reading_writing(path)
|
||||
|
||||
def test_muscle(self):
|
||||
path = "Clustalw/muscle.fa"
|
||||
with open(path) as stream:
|
||||
alignments = AlignmentIterator(stream)
|
||||
alignment = next(alignments)
|
||||
with self.assertRaises(StopIteration):
|
||||
next(alignments)
|
||||
self.assertEqual(
|
||||
repr(alignment),
|
||||
"<Bio.Align.Alignment object (3 rows x 687 columns) at 0x%x>"
|
||||
% id(alignment),
|
||||
)
|
||||
self.assertEqual(len(alignment), 3)
|
||||
self.assertEqual(alignment.sequences[0].id, "Test1seq")
|
||||
self.assertEqual(alignment.sequences[1].id, "AT3G20900.1-SEQ")
|
||||
self.assertEqual(alignment.sequences[2].id, "AT3G20900.1-CDS")
|
||||
self.assertEqual(
|
||||
alignment.sequences[0].seq,
|
||||
"AGTTACAATAACTGACGAAGCTAAGTAGGCTACTAATTAACGTCATCAACCTAATACATAGCACTTAGAAAAAAGTGAAGTAAGAAAATATAAAATAATAAAAGGGTGGGTTATCAATTGATAGTGTAAATCATCGTATTCCGGTGATATACCCTACCACAAAAACTCAAACCGACTTGATTCAAATCATCTCAATAAATTAGCGCCAAAATAATGAAAAAAATAATAACAAACAAAAACAAACCAAAATAAGAAAAAACATTACGCAAAACATAATAATTTACTCTTCGTTATTGTATTAACAAATCAAAGAGCTGAATTTTGATCACCTGCTAATACTACTTTCTGTATTGATCCTATATCAACGTAAACAAAGATACTAATAATTAACTAAAAGTACGTTCATCGATCGTGTTCGTTGACGAAGAAGAGCTCTATCTCCGGCGGAGCAAAGAAAACGATCTGTCTCCGTCGTAACACACGGTCGCTAGAGAAACTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCCGGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCGTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTT",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment.sequences[1].seq,
|
||||
"ATGAACAAAGTAGCGAGGAAGAACAAAACATCAGGTGAACAAAAAAAAAACTCAATCCACATCAAAGTTACAATAACTGACGAAGCTAAGTAGGCTAGAAATTAAAGTCATCAACCTAATACATAGCACTTAGAAAAAAGTGAAGCAAGAAAATATAAAATAATAAAAGGGTGGGTTATCAATTGATAGTGTAAATCATAGTTGATTTTTGATATACCCTACCACAAAAACTCAAACCGACTTGATTCAAATCATCTCAAAAAACAAGCGCCAAAATAATGAAAAAAATAATAACAAAAACAAACAAACCAAAATAAGAAAAAACATTACGCAAAACATAATAATTTACTCTTCGTTATTGTATTAACAAATCAAAGAGATGAATTTTGATCACCTGCTAATACTACTTTCTGTATTGATCCTATATCAAAAAAAAAAAAGATACTAATAATTAACTAAAAGTACGTTCATCGATCGTGTGCGTTGACGAAGAAGAGCTCTATCTCCGGCGGAGCAAAGAAAACGATCTGTCTCCGTCGTAACACACAGTTTTTCGAGACCCTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCCGGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCCTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTAG",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment.sequences[2].seq,
|
||||
"ATGAACAAAGTAGCGAGGAAGAACAAAACATCAGCAAAGAAAACGATCTGTCTCCGTCGTAACACACAGTTTTTCGAGACCCTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCCGGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCCTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTAG",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment[0],
|
||||
"-----------------------------------------------------------------AGTTACAATAACTGACGAAGCTAAGTAGGCTACTAATTAACGTCATCAACCTAATACATAGCACTTAGAAAAAAGTGAAGTAAGAAAATATAAAATAATAAAAGGGTGGGTTATCAATTGATAGTGTAAATCATCGTATTCCGGTGATATACCCTACCACAAAAACTCAAACCGACTTGATTCAAATCATCTCAATAAATTAGCGCCAAAATAATGAAAAAAATAATAACAAACAAAAACAAACCAAAATAAGAAAAAACATTACGCAAAACATAATAATTTACTCTTCGTTATTGTATTAACAAATCAAAGAGCTGAATTTTGATCACCTGCTAATACTACTTTCTGTATTGATCCTATATCAACGTAAACAAAGATACTAATAATTAACTAAAAGTACGTTCATCGATCGTGTTCGTTGACGAAGAAGAGCTCTATCTCCGGCGGAGCAAAGAAAACGATCTGTCTCCGTCGTAACACACGGTCGCTAGAGAAACTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCCGGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCGTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTT-",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment[1],
|
||||
"ATGAACAAAGTAGCGAGGAAGAACAAAACATCAGGTGAACAAAAAAAAAACTCAATCCACATCAAAGTTACAATAACTGACGAAGCTAAGTAGGCTAGAAATTAAAGTCATCAACCTAATACATAGCACTTAGAAAAAAGTGAAGCAAGAAAATATAAAATAATAAAAGGGTGGGTTATCAATTGATAGTGTAAATCATAGTTGATTTTTGATATACCCTACCACAAAAACTCAAACCGACTTGATTCAAATCATCTCAAAAAACAAGCGCCAAAATAATGAAAAAAATAATAACAAAAACAAACAAACCAAAATAAGAAAAAACATTACGCAAAACATAATAATTTACTCTTCGTTATTGTATTAACAAATCAAAGAGATGAATTTTGATCACCTGCTAATACTACTTTCTGTATTGATCCTATATCAAAAAAAAAAAAGATACTAATAATTAACTAAAAGTACGTTCATCGATCGTGTGCGTTGACGAAGAAGAGCTCTATCTCCGGCGGAGCAAAGAAAACGATCTGTCTCCGTCGTAACACACAGTTTTTCGAGACCCTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCCGGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCCTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTAG",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment[2],
|
||||
"--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ATGAACAAAGTAGCGAGGAAGAA------------------------------CAAAACATC----------------------------------------------------------------------------------------------------------------------------------------------------------------------------AGCAAAGAAAACGATCTGTCTCCGTCGTAACACACAGTTTTTCGAGACCCTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCCGGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCCTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTAG",
|
||||
)
|
||||
self.check_reading_writing(path)
|
||||
|
||||
def test_kalign(self):
|
||||
path = "Clustalw/kalign.fa"
|
||||
with open(path) as stream:
|
||||
alignments = AlignmentIterator(stream)
|
||||
alignment = next(alignments)
|
||||
with self.assertRaises(StopIteration):
|
||||
next(alignments)
|
||||
self.assertEqual(
|
||||
repr(alignment),
|
||||
"<Bio.Align.Alignment object (2 rows x 27 columns) at 0x%x>"
|
||||
% id(alignment),
|
||||
)
|
||||
self.assertEqual(len(alignment), 2)
|
||||
self.assertEqual(alignment.sequences[0].id, "Test1seq")
|
||||
self.assertEqual(alignment.sequences[1].id, "AT3G20900")
|
||||
self.assertEqual(alignment.sequences[0].seq, "GCTGGGGATGGAGAGGGAACAGAGTT")
|
||||
self.assertEqual(alignment.sequences[1].seq, "GCTGGGGATGGAGAGGGAACAGAGTAG")
|
||||
self.assertEqual(alignment[0], "GCTGGGGATGGAGAGGGAACAGAGT-T")
|
||||
self.assertEqual(alignment[1], "GCTGGGGATGGAGAGGGAACAGAGTAG")
|
||||
self.check_reading_writing(path)
|
||||
|
||||
def test_probcons(self):
|
||||
path = "Clustalw/probcons.fa"
|
||||
# example taken from the PROBCONS documentation,
|
||||
# and converted to aligned FASTA format.
|
||||
with open(path) as stream:
|
||||
alignments = AlignmentIterator(stream)
|
||||
alignment = next(alignments)
|
||||
with self.assertRaises(StopIteration):
|
||||
next(alignments)
|
||||
self.assertEqual(
|
||||
repr(alignment),
|
||||
"<Bio.Align.Alignment object (5 rows x 101 columns) at 0x%x>"
|
||||
% id(alignment),
|
||||
)
|
||||
self.assertEqual(len(alignment), 5)
|
||||
self.assertEqual(alignment.sequences[0].id, "plas_horvu")
|
||||
self.assertEqual(alignment.sequences[1].id, "plas_chlre")
|
||||
self.assertEqual(alignment.sequences[2].id, "plas_anava")
|
||||
self.assertEqual(alignment.sequences[3].id, "plas_proho")
|
||||
self.assertEqual(alignment.sequences[4].id, "azup_achcy")
|
||||
self.assertEqual(
|
||||
alignment.sequences[0].seq,
|
||||
"DVLLGANGGVLVFEPNDFSVKAGETITFKNNAGYPHNVVFDEDAVPSGVDVSKISQEEYLTAPGETFSVTLTVPGTYGFYCEPHAGAGMVGKVTV",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment.sequences[1].seq,
|
||||
"VKLGADSGALEFVPKTLTIKSGETVNFVNNAGFPHNIVFDEDAIPSGVNADAISRDDYLNAPGETYSVKLTAAGEYGYYCEPHQGAGMVGKIIV",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment.sequences[2].seq,
|
||||
"VKLGSDKGLLVFEPAKLTIKPGDTVEFLNNKVPPHNVVFDAALNPAKSADLAKSLSHKQLLMSPGQSTSTTFPADAPAGEYTFYCEPHRGAGMVGKITV",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment.sequences[3].seq,
|
||||
"VQIKMGTDKYAPLYEPKALSISAGDTVEFVMNKVGPHNVIFDKVPAGESAPALSNTKLRIAPGSFYSVTLGTPGTYSFYCTPHRGAGMVGTITV",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment.sequences[4].seq,
|
||||
"VHMLNKGKDGAMVFEPASLKVAPGDTVTFIPTDKGHNVETIKGMIPDGAEAFKSKINENYKVTFTAPGVYGVKCTPHYGMGMVGVVEV",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment[0],
|
||||
"D-VLLGANGGVLVFEPNDFSVKAGETITFKNNAGYPHNVVFDEDAVPSG-VD-VSKISQEEYLTAPGETFSVTLTV---PGTYGFYCEPHAGAGMVGKVTV",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment[1],
|
||||
"--VKLGADSGALEFVPKTLTIKSGETVNFVNNAGFPHNIVFDEDAIPSG-VN-ADAISRDDYLNAPGETYSVKLTA---AGEYGYYCEPHQGAGMVGKIIV",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment[2],
|
||||
"--VKLGSDKGLLVFEPAKLTIKPGDTVEFLNNKVPPHNVVFDAALNPAKSADLAKSLSHKQLLMSPGQSTSTTFPADAPAGEYTFYCEPHRGAGMVGKITV",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment[3],
|
||||
"VQIKMGTDKYAPLYEPKALSISAGDTVEFVMNKVGPHNVIFDK--VPAG-ES-APALSNTKLRIAPGSFYSVTLGT---PGTYSFYCTPHRGAGMVGTITV",
|
||||
)
|
||||
self.assertEqual(
|
||||
alignment[4],
|
||||
"VHMLNKGKDGAMVFEPASLKVAPGDTVTFIPTDK-GHNVETIKGMIPDG-AE-A-------FKSKINENYKVTFTA---PGVYGVKCTPHYGMGMVGVVEV",
|
||||
)
|
||||
self.check_reading_writing(path)
|
||||
|
||||
def test_empty(self):
|
||||
"""Checking empty file."""
|
||||
stream = StringIO()
|
||||
alignments = AlignmentIterator(stream)
|
||||
with self.assertRaises(ValueError):
|
||||
next(alignments)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
runner = unittest.TextTestRunner(verbosity=2)
|
||||
unittest.main(testRunner=runner)
|
Reference in New Issue
Block a user