mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 21:53:47 +08:00
$ ruff check --fix --select=I \ --config=lint.isort.force-single-line=true \ --config=lint.isort.order-by-type=false \ BioSQL/ Bio/ Tests/ Scripts/ Doc/ setup.py Using ruff version 0.4.10
291 lines
14 KiB
Python
291 lines
14 KiB
Python
# Copyright 2006-2014 by Peter Cock. All rights reserved.
|
|
# This code is part of the Biopython distribution and governed by its
|
|
# license. Please see the LICENSE file that should have been included
|
|
# as part of this package.
|
|
"""Tests for Bio.AlignIO.ClustalIO module."""
|
|
import unittest
|
|
from io import StringIO
|
|
|
|
from Bio.AlignIO.ClustalIO import ClustalIterator
|
|
from Bio.AlignIO.ClustalIO import ClustalWriter
|
|
|
|
# This is a truncated version of the example in Tests/clustalw.aln
|
|
# Notice the inclusion of sequence numbers (right hand side)
|
|
aln_example1 = """\
|
|
CLUSTAL W (1.81) multiple sequence alignment
|
|
|
|
|
|
gi|4959044|gb|AAD34209.1|AF069 MENSDSNDKGSDQSAAQRRSQMDRLDREEAFYQFVNNLSEEDYRLMRDNN 50
|
|
gi|671626|emb|CAA85685.1| ---------MSPQTETKASVGFKAGVKEYKLTYYTPEYETKDTDILAAFR 41
|
|
* *: :: :. :* : :. : . :* :: .
|
|
|
|
gi|4959044|gb|AAD34209.1|AF069 LLGTPGESTEEELLRRLQQIKEGPPPQSPDENRAGESSDDVTNSDSIIDW 100
|
|
gi|671626|emb|CAA85685.1| VTPQPG-----------------VPPEEAGAAVAAESSTGT--------- 65
|
|
: ** **:... *.*** ..
|
|
|
|
gi|4959044|gb|AAD34209.1|AF069 LNSVRQTGNTTRSRQRGNQSWRAVSRTNPNSGDFRFSLEINVNRNNGSQT 150
|
|
gi|671626|emb|CAA85685.1| WTTVWTDGLTSLDRYKG-----RCYHIEPVPG------------------ 92
|
|
.:* * *: .* :* : :* .*
|
|
|
|
gi|4959044|gb|AAD34209.1|AF069 SENESEPSTRRLSVENMESSSQRQMENSASESASARPSRAERNSTEAVTE 200
|
|
gi|671626|emb|CAA85685.1| -EKDQCICYVAYPLDLFEEGSVTNMFTSIVGNVFGFKALRALRLEDLRIP 141
|
|
*::. . .:: :*..* :* .* .. . : . :
|
|
|
|
gi|4959044|gb|AAD34209.1|AF069 VPTTRAQRRA 210
|
|
gi|671626|emb|CAA85685.1| VAYVKTFQGP 151
|
|
*. .:: : .
|
|
""" # noqa : W291
|
|
|
|
# This example is a truncated version of the dataset used here:
|
|
# http://virgil.ruc.dk/kurser/Sekvens/Treedraw.htm
|
|
# with the last record repeated twice (deliberate toture test)
|
|
aln_example2 = """\
|
|
CLUSTAL X (1.83) multiple sequence alignment
|
|
|
|
|
|
V_Harveyi_PATH --MKNWIKVAVAAIA--LSAA------------------TVQAATEVKVG
|
|
B_subtilis_YXEM MKMKKWTVLVVAALLAVLSACG------------NGNSSSKEDDNVLHVG
|
|
B_subtilis_GlnH_homo_YCKK MKKALLALFMVVSIAALAACGAGNDNQSKDNAKDGDLWASIKKKGVLTVG
|
|
YA80_HAEIN MKKLLFTTALLTGAIAFSTF-----------SHAGEIADRVEKTKTLLVG
|
|
FLIY_ECOLI MKLAHLGRQALMGVMAVALVAG---MSVKSFADEG-LLNKVKERGTLLVG
|
|
E_coli_GlnH --MKSVLKVSLAALTLAFAVS------------------SHAADKKLVVA
|
|
Deinococcus_radiodurans -MKKSLLSLKLSGLLVPSVLALS--------LSACSSPSSTLNQGTLKIA
|
|
HISJ_E_COLI MKKLVLSLSLVLAFSSATAAF-------------------AAIPQNIRIG
|
|
HISJ_E_COLI MKKLVLSLSLVLAFSSATAAF-------------------AAIPQNIRIG
|
|
: . : :.
|
|
|
|
V_Harveyi_PATH MSGRYFPFTFVKQ--DKLQGFEVDMWDEIGKRNDYKIEYVTANFSGLFGL
|
|
B_subtilis_YXEM ATGQSYPFAYKEN--GKLTGFDVEVMEAVAKKIDMKLDWKLLEFSGLMGE
|
|
B_subtilis_GlnH_homo_YCKK TEGTYEPFTYHDKDTDKLTGYDVEVITEVAKRLGLKVDFKETQWGSMFAG
|
|
YA80_HAEIN TEGTYAPFTFHDK-SGKLTGFDVEVIRKVAEKLGLKVEFKETQWDAMYAG
|
|
FLIY_ECOLI LEGTYPPFSFQGD-DGKLTGFEVEFAQQLAKHLGVEASLKPTKWDGMLAS
|
|
E_coli_GlnH TDTAFVPFEFKQG--DKYVGFDVDLWAAIAKELKLDYELKPMDFSGIIPA
|
|
Deinococcus_radiodurans MEGTYPPFTSKNE-QGELVGFDVDIAKAVAQKLNLKPEFVLTEWSGILAG
|
|
HISJ_E_COLI TDPTYAPFESKNS-QGELVGFDIDLAKELCKRINTQCTFVENPLDALIPS
|
|
HISJ_E_COLI TDPTYAPFESKNS-QGELVGFDIDLAKELCKRINTQCTFVENPLDALIPS
|
|
** .: *::::. : :. . ..:
|
|
|
|
V_Harveyi_PATH LETGRIDTISNQITMTDARKAKYLFADPYVVDG-AQI
|
|
B_subtilis_YXEM LQTGKLDTISNQVAVTDERKETYNFTKPYAYAG-TQI
|
|
B_subtilis_GlnH_homo_YCKK LNSKRFDVVANQVG-KTDREDKYDFSDKYTTSR-AVV
|
|
YA80_HAEIN LNAKRFDVIANQTNPSPERLKKYSFTTPYNYSG-GVI
|
|
FLIY_ECOLI LDSKRIDVVINQVTISDERKKKYDFSTPYTISGIQAL
|
|
E_coli_GlnH LQTKNVDLALAGITITDERKKAIDFSDGYYKSG-LLV
|
|
Deinococcus_radiodurans LQANKYDVIVNQVGITPERQNSIGFSQPYAYSRPEII
|
|
HISJ_E_COLI LKAKKIDAIMSSLSITEKRQQEIAFTDKLYAADSRLV
|
|
HISJ_E_COLI LKAKKIDAIMSSLSITEKRQQEIAFTDKLYAADSRLV
|
|
*.: . * . * *: :
|
|
|
|
""" # noqa : W291
|
|
|
|
|
|
aln_example3 = """\
|
|
CLUSTAL 2.0.9 multiple sequence alignment
|
|
|
|
|
|
Test1seq ------------------------------------------------------------
|
|
AT3G20900.1-SEQ ATGAACAAAGTAGCGAGGAAGAACAAAACATCAGGTGAACAAAAAAAAAACTCAATCCAC
|
|
AT3G20900.1-CDS ------------------------------------------------------------
|
|
|
|
|
|
Test1seq -----AGTTACAATAACTGACGAAGCTAAGTAGGCTACTAATTAACGTCATCAACCTAAT
|
|
AT3G20900.1-SEQ ATCAAAGTTACAATAACTGACGAAGCTAAGTAGGCTAGAAATTAAAGTCATCAACCTAAT
|
|
AT3G20900.1-CDS ------------------------------------------------------------
|
|
|
|
|
|
Test1seq ACATAGCACTTAGAAAAAAGTGAAGTAAGAAAATATAAAATAATAAAAGGGTGGGTTATC
|
|
AT3G20900.1-SEQ ACATAGCACTTAGAAAAAAGTGAAGCAAGAAAATATAAAATAATAAAAGGGTGGGTTATC
|
|
AT3G20900.1-CDS ------------------------------------------------------------
|
|
|
|
|
|
Test1seq AATTGATAGTGTAAATCATCGTATTCCGGTGATATACCCTACCACAAAAACTCAAACCGA
|
|
AT3G20900.1-SEQ AATTGATAGTGTAAATCATAGTTGATTTTTGATATACCCTACCACAAAAACTCAAACCGA
|
|
AT3G20900.1-CDS ------------------------------------------------------------
|
|
|
|
|
|
Test1seq CTTGATTCAAATCATCTCAATAAATTAGCGCCAAAATAATGAAAAAAATAATAACAAACA
|
|
AT3G20900.1-SEQ CTTGATTCAAATCATCTCAAAAAACAAGCGCCAAAATAATGAAAAAAATAATAACAAAAA
|
|
AT3G20900.1-CDS ------------------------------------------------------------
|
|
|
|
|
|
Test1seq AAAACAAACCAAAATAAGAAAAAACATTACGCAAAACATAATAATTTACTCTTCGTTATT
|
|
AT3G20900.1-SEQ CAAACAAACCAAAATAAGAAAAAACATTACGCAAAACATAATAATTTACTCTTCGTTATT
|
|
AT3G20900.1-CDS ------------------------------------------------------------
|
|
|
|
|
|
Test1seq GTATTAACAAATCAAAGAGCTGAATTTTGATCACCTGCTAATACTACTTTCTGTATTGAT
|
|
AT3G20900.1-SEQ GTATTAACAAATCAAAGAGATGAATTTTGATCACCTGCTAATACTACTTTCTGTATTGAT
|
|
AT3G20900.1-CDS ------------------------------------------------------------
|
|
|
|
|
|
Test1seq CCTATATCAACGTAAACAAAGATACTAATAATTAACTAAAAGTACGTTCATCGATCGTGT
|
|
AT3G20900.1-SEQ CCTATATCAAAAAAAAAAAAGATACTAATAATTAACTAAAAGTACGTTCATCGATCGTGT
|
|
AT3G20900.1-CDS ------------------------------------------------------ATGAAC
|
|
*
|
|
|
|
Test1seq TCGTTGACGAAGAAGAGCTCTATCTCCGGCGGAGCAAAGAAAACGATCTGTCTCCGTCGT
|
|
AT3G20900.1-SEQ GCGTTGACGAAGAAGAGCTCTATCTCCGGCGGAGCAAAGAAAACGATCTGTCTCCGTCGT
|
|
AT3G20900.1-CDS AAAGTAGCGAGGAAGAACAAAACATC------AGCAAAGAAAACGATCTGTCTCCGTCGT
|
|
* *** ***** * * ** ****************************
|
|
|
|
Test1seq AACACACGGTCGCTAGAGAAACTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCC
|
|
AT3G20900.1-SEQ AACACACAGTTTTTCGAGACCCTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCC
|
|
AT3G20900.1-CDS AACACACAGTTTTTCGAGACCCTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCC
|
|
******* ** * **** ***************************************
|
|
|
|
Test1seq GGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCGTGGTGACGTCAGCACCGCT
|
|
AT3G20900.1-SEQ GGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCCTGGTGACGTCAGCACCGCT
|
|
AT3G20900.1-CDS GGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCCTGGTGACGTCAGCACCGCT
|
|
**************************************** *******************
|
|
|
|
Test1seq GCTGGGGATGGAGAGGGAACAGAGTT-
|
|
AT3G20900.1-SEQ GCTGGGGATGGAGAGGGAACAGAGTAG
|
|
AT3G20900.1-CDS GCTGGGGATGGAGAGGGAACAGAGTAG
|
|
*************************
|
|
""" # noqa : W291
|
|
|
|
aln_example4 = """\
|
|
Kalign (2.0) alignment in ClustalW format
|
|
|
|
Test1seq GCTGGGGATGGAGAGGGAACAGAGTT-
|
|
AT3G20900.1-SEQ GCTGGGGATGGAGAGGGAACAGAGTAG
|
|
|
|
"""
|
|
|
|
aln_example5 = """\
|
|
Biopython 1.80.dev0 multiple sequence alignment
|
|
|
|
|
|
gi|4959044|gb|AAD34209.1|AF069 ------------MENSDSNDKGSDQSAAQRRSQMDRLDREEAFYQFVNNL
|
|
gi|671626|emb|CAA85685.1| MSPQTETKASVGFKAGVKEYKLTYYTPEYETKDTDILAAFRVTPQPGVPP
|
|
|
|
gi|4959044|gb|AAD34209.1|AF069 SEEDYRLMRDNNLLGTPGESTEEELLRRLQQIKEGPPPQSPDENRAGESS
|
|
gi|671626|emb|CAA85685.1| -EEAGAAVAAESSTGTWTTVWTDGLTS-LDRYK-GRCYHI--EPVPGEKD
|
|
|
|
gi|4959044|gb|AAD34209.1|AF069 DDVTNSDSIIDWLNSVRQTGNTTRSRQRGNQSWRAVSRTNPNSGDFRFSL
|
|
gi|671626|emb|CAA85685.1| QCICYVAYPLDLFEEGSVTNMFT-SIV-GNVFGFKALRALRLE-DLRIPV
|
|
|
|
gi|4959044|gb|AAD34209.1|AF069 EINVNRNNGSQTSENESEPSTRRLSVENMESSSQRQMENSASESASARPS
|
|
gi|671626|emb|CAA85685.1| AY-VKTFQGPPHGIQVERDKLNKYGRPLLGCTIKPKLGLSAKNYGRAVYE
|
|
|
|
gi|4959044|gb|AAD34209.1|AF069 RAERNSTEAVTEVPTTRAQRRARSRSPEHRRTRARAERSMSPLQPTSEIP
|
|
gi|671626|emb|CAA85685.1| CL-RGGLDFTKDDENVNSQPFMRWRD---RFLFC-AEAIYKAQAETGEIK
|
|
|
|
gi|4959044|gb|AAD34209.1|AF069 RRAPTLEQSSENEPEGSSRTRHHVTLRQQISGPELLGRGLFAASGSRNPS
|
|
gi|671626|emb|CAA85685.1| GHYLNATAGTC-E-EMIKRAIFARELGVPIVMHDYLTGG-FTANTSLAHY
|
|
|
|
gi|4959044|gb|AAD34209.1|AF069 QGTSSSDTGSNSESSGSGQRPPTIVLDLQVRRVRPGEYRQRDSIASRTRS
|
|
gi|671626|emb|CAA85685.1| CRDNGLLLHIHRAMHAVIDRQKNHGMHFRVLAKALRLSGG-DHIHSGTVV
|
|
|
|
gi|4959044|gb|AAD34209.1|AF069 RSQAPNNTVTYESERGGFRRTFSRSERAGVRTYVSTIRIPIRRILNTGLS
|
|
gi|671626|emb|CAA85685.1| GKLEGERDITLGFVDLL-RDDFIEKDRSRGI-YF-TQDWVSLPGVIPVAS
|
|
|
|
gi|4959044|gb|AAD34209.1|AF069 ETTSVAIQTMLRQIMTGFGELSYFMYSDSDSEPSAS--VSSRN-VER-VE
|
|
gi|671626|emb|CAA85685.1| GGIHVWHMPALTEIFGDDSVLQFGGGTLGHPWGNAPGAVANRVAVEACVK
|
|
|
|
gi|4959044|gb|AAD34209.1|AF069 SRN-GRGSSGGGNSSGSSSSS-SPSPSSSGESSESSSKMFEGSSEGGSSG
|
|
gi|671626|emb|CAA85685.1| ARNEGRDLAAEGNAIIREACKWSPELAAACEVWKEIKFEFPAMD------
|
|
|
|
gi|4959044|gb|AAD34209.1|AF069 PSRKDGRHRAPVTFDESGSLPFFSLAQFFLLNEDDEDQPRGLTKEQIDNL
|
|
gi|671626|emb|CAA85685.1| --------------------------------------------------
|
|
|
|
gi|4959044|gb|AAD34209.1|AF069 AMRSFGENDALKTCSVCITEYTEGDKLRKLPCSHEFHVHCIDRWLSENST
|
|
gi|671626|emb|CAA85685.1| --------------------------------------------------
|
|
|
|
gi|4959044|gb|AAD34209.1|AF069 CPICRRAVLSSGNRESVV
|
|
gi|671626|emb|CAA85685.1| ------------------
|
|
|
|
"""
|
|
|
|
|
|
class TestClustalIO(unittest.TestCase):
|
|
def test_one(self):
|
|
alignments = list(ClustalIterator(StringIO(aln_example1)))
|
|
self.assertEqual(1, len(alignments))
|
|
self.assertEqual(alignments[0]._version, "1.81")
|
|
alignment = alignments[0]
|
|
self.assertEqual(2, len(alignment))
|
|
self.assertEqual(alignment[0].id, "gi|4959044|gb|AAD34209.1|AF069")
|
|
self.assertEqual(alignment[1].id, "gi|671626|emb|CAA85685.1|")
|
|
self.assertEqual(
|
|
alignment[0].seq,
|
|
"MENSDSNDKGSDQSAAQRRSQMDRLDREEAFYQFVNNLSEEDYRLMRDNN"
|
|
"LLGTPGESTEEELLRRLQQIKEGPPPQSPDENRAGESSDDVTNSDSIIDW"
|
|
"LNSVRQTGNTTRSRQRGNQSWRAVSRTNPNSGDFRFSLEINVNRNNGSQT"
|
|
"SENESEPSTRRLSVENMESSSQRQMENSASESASARPSRAERNSTEAVTE"
|
|
"VPTTRAQRRA",
|
|
)
|
|
|
|
def test_two(self):
|
|
alignments = list(ClustalIterator(StringIO(aln_example2)))
|
|
self.assertEqual(1, len(alignments))
|
|
self.assertEqual(alignments[0]._version, "1.83")
|
|
alignment = alignments[0]
|
|
self.assertEqual(9, len(alignment))
|
|
self.assertEqual(alignment[-1].id, "HISJ_E_COLI")
|
|
self.assertEqual(
|
|
alignment[-1].seq,
|
|
"MKKLVLSLSLVLAFSSATAAF-------------------AAIPQNIRIG"
|
|
"TDPTYAPFESKNS-QGELVGFDIDLAKELCKRINTQCTFVENPLDALIPS"
|
|
"LKAKKIDAIMSSLSITEKRQQEIAFTDKLYAADSRLV",
|
|
)
|
|
|
|
def test_cat_one_two(self):
|
|
alignments = list(ClustalIterator(StringIO(aln_example2 + aln_example1)))
|
|
self.assertEqual(2, len(alignments))
|
|
self.assertEqual(9, len(alignments[0]))
|
|
self.assertEqual(137, alignments[0].get_alignment_length())
|
|
self.assertEqual(2, len(alignments[1]))
|
|
self.assertEqual(210, alignments[1].get_alignment_length())
|
|
|
|
def test_empty(self):
|
|
"""Checking empty file."""
|
|
self.assertEqual(0, len(list(ClustalIterator(StringIO("")))))
|
|
|
|
def test_write_read(self):
|
|
"""Checking write/read."""
|
|
alignments = (
|
|
list(ClustalIterator(StringIO(aln_example1)))
|
|
+ list(ClustalIterator(StringIO(aln_example2))) * 2
|
|
)
|
|
handle = StringIO()
|
|
self.assertEqual(3, ClustalWriter(handle).write_file(alignments))
|
|
handle.seek(0)
|
|
for i, a in enumerate(ClustalIterator(handle)):
|
|
self.assertEqual(
|
|
a.get_alignment_length(), alignments[i].get_alignment_length()
|
|
)
|
|
|
|
def test_write_read_single(self):
|
|
"""Testing write/read when there is only one sequence."""
|
|
alignment = next(ClustalIterator(StringIO(aln_example1)))
|
|
# Now thae just the first row as a new alignment:
|
|
alignment = alignment[0:1]
|
|
handle = StringIO()
|
|
ClustalWriter(handle).write_file([alignment])
|
|
handle.seek(0)
|
|
for i, a in enumerate(ClustalIterator(handle)):
|
|
self.assertEqual(a.get_alignment_length(), alignment.get_alignment_length())
|
|
self.assertEqual(len(a), 1)
|
|
|
|
def test_three(self):
|
|
alignments = list(ClustalIterator(StringIO(aln_example3)))
|
|
self.assertEqual(1, len(alignments))
|
|
self.assertEqual(alignments[0]._version, "2.0.9")
|
|
|
|
def test_kalign_header(self):
|
|
"""Make sure we can parse the Kalign header."""
|
|
alignment = next(ClustalIterator(StringIO(aln_example4)))
|
|
self.assertEqual(2, len(alignment))
|
|
|
|
def test_biopython_header(self):
|
|
"""Make sure we can parse the Biopython header."""
|
|
alignment = next(ClustalIterator(StringIO(aln_example5)))
|
|
self.assertEqual(2, len(alignment))
|
|
self.assertEqual(alignment._version, "1.80.dev0")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
runner = unittest.TextTestRunner(verbosity=2)
|
|
unittest.main(testRunner=runner)
|