# Copyright 2006-2014 by Peter Cock. All rights reserved. # Copyright 2022 by Michiel de Hoon. All rights reserved. # This code is part of the Biopython distribution and governed by its # license. Please see the LICENSE file that should have been included # as part of this package. """Tests for Bio.Align.a2m module.""" import unittest from io import StringIO import numpy as np from Bio import Align from Bio.Align import substitution_matrices substitution_matrix = substitution_matrices.load("BLOSUM62") class TestA2MReadingWriting(unittest.TestCase): def check_reading_writing(self, path): alignments = Align.parse(path, "a2m") stream = StringIO() n = Align.write(alignments, stream, "a2m") self.assertEqual(n, 1) alignments = Align.parse(path, "a2m") alignment = next(alignments) stream.seek(0) saved_alignments = Align.parse(stream, "a2m") saved_alignment = next(saved_alignments) with self.assertRaises(StopIteration): next(saved_alignments) self.assertEqual(len(alignment), len(saved_alignment)) for i, (sequence, saved_sequence) in enumerate( zip(alignment.sequences, saved_alignment.sequences) ): self.assertEqual(sequence.id, saved_sequence.id) self.assertEqual(sequence.seq, saved_sequence.seq) self.assertEqual(alignment[i], saved_alignment[i]) def test_clustalw(self): path = "Clustalw/clustalw.a2m" with open(path) as stream: alignments = Align.parse(stream, "a2m") alignment = next(alignments) with self.assertRaises(StopIteration): next(alignments) self.check_clustalw(alignment) alignments = iter(alignments) alignment = next(alignments) with self.assertRaises(StopIteration): next(alignments) self.check_clustalw(alignment) with Align.parse(path, "a2m") as alignments: alignment = next(alignments) self.check_clustalw(alignment) with self.assertRaises(AttributeError): alignments._stream with Align.parse(path, "a2m") as alignments: pass with self.assertRaises(AttributeError): alignments._stream self.check_reading_writing(path) def check_clustalw(self, alignment): self.assertEqual( repr(alignment), "" % id(alignment), ) self.assertEqual(len(alignment), 2) self.assertEqual(alignment.sequences[0].id, "gi|4959044|gb|AAD34209.1|AF069") self.assertEqual(alignment.sequences[1].id, "gi|671626|emb|CAA85685.1|") self.assertEqual( alignment.sequences[0].seq, "MENSDSNDKGSDQSAAQRRSQMDRLDREEAFYQFVNNLSEEDYRLMRDNNLLGTPGESTEEELLRRLQQIKEGPPPQSPDENRAGESSDDVTNSDSIIDWLNSVRQTGNTTRSRQRGNQSWRAVSRTNPNSGDFRFSLEINVNRNNGSQTSENESEPSTRRLSVENMESSSQRQMENSASESASARPSRAERNSTEAVTEVPTTRAQRRARSRSPEHRRTRARAERSMSPLQPTSEIPRRAPTLEQSSENEPEGSSRTRHHVTLRQQISGPELLGRGLFAASGSRNPSQGTSSSDTGSNSESSGSGQRPPTIVLDLQVRRVRPGEYRQRDSIASRTRSRSQAPNNTVTYESERGGFRRTFSRSERAGVRTYVSTIRIPIRRILNTGLSETTSVAIQTMLRQIMTGFGELSYFMYSDSDSEPSASVSSRNVERVESRNGRGSSGGGNSSGSSSSSSPSPSSSGESSESSSKMFEGSSEGGSSGPSRKDGRHRAPVTFDESGSLPFFSLAQFFLLNEDDEDQPRGLTKEQIDNLAMRSFGENDALKTCSVCITEYTEGDKLRKLPCSHEFHVHCIDRWLSENSTCPICRRAVLSSGNRESVV", ) self.assertEqual( alignment.sequences[1].seq, "MSPQTETKASVGFKAGVKEYKLTYYTPEYETKDTDILAAFRVTPQPGVPPEEAGAAVAAESSTGTWTTVWTDGLTSLDRYKGRCYHIEPVPGEKDQCICYVAYPLDLFEEGSVTNMFTSIVGNVFGFKALRALRLEDLRIPVAYVKTFQGPPHGIQVERDKLNKYGRPLLGCTIKPKLGLSAKNYGRAVYECLRGGLDFTKDDENVNSQPFMRWRDRFLFCAEAIYKAQAETGEIKGHYLNATAGTCEEMIKRAIFARELGVPIVMHDYLTGGFTANTSLAHYCRDNGLLLHIHRAMHAVIDRQKNHGMHFRVLAKALRLSGGDHIHSGTVVGKLEGERDITLGFVDLLRDDFIEKDRSRGIYFTQDWVSLPGVIPVASGGIHVWHMPALTEIFGDDSVLQFGGGTLGHPWGNAPGAVANRVAVEACVKARNEGRDLAAEGNAIIREACKWSPELAAACEVWKEIKFEFPAMD", ) self.assertEqual( alignment[0], "MENSDSNDKGSDQSAAQRRSQMDRLDREEAFYQFVNNLSEEDYRLMRDNNLLGTPGESTEEELLRRLQQIKEGPPPQSPDENRAGESSDDVTNSDSIIDWLNSVRQTGNTTRSRQRGNQSWRAVSRTNPNSGDFRFSLEINVNRNNGSQTSENESEPSTRRLSVENMESSSQRQMENSASESASARPSRAERNSTEAVTEVPTTRAQRRARSRSPEHRRTRARAERSMSPLQPTSEIPRRAPTLEQSSENEPEGSSRTRHHVTLRQQISGPELLGRGLFAASGSRNPSQGTSSSDTGSNSESSGSGQRPPTIVLDLQVRRVRPGEYRQRDSIASRTRSRSQAPNNTVTYESERGGFRRTFSRSERAGVRTYVSTIRIPIRRILNTGLSETTSVAIQTMLRQIMTGFGELSYFMYSDSDSEPSASVSSRNVERVESRNGRGSSGGGNSSGSSSSSSPSPSSSGESSESSSKMFEGSSEGGSSGPSRKDGRHRAPVTFDESGSLPFFSLAQFFLLNEDDEDQPRGLTKEQIDNLAMRSFGENDALKTCSVCITEYTEGDKLRKLPCSHEFHVHCIDRWLSE-NSTCPICRRAVLSSGNRESVV", ) self.assertEqual( alignment[1], "---------MSPQTETKASVGFKAGVKEYKLTYYTPEYETKDTDILAAFRVTPQPG-----------------VPPEEAGAAVAAESSTGT---------WTTVWTDGLTSLDRYKG-----RCYHIEPVPG-------------------EKDQCICYVAYPLDLFEEGSVTNMFTSIVGNVFGFKALRALRLEDLRIPVAYVKTFQGPPHGIQVERDKLNKYGRPLLGCTIKPKLGLSAKNYGRAVYECLRGGLDFTKDDENVNSQPFMRWRDRFLFCAEAIYKAQAETGEIKGHYLNATAG-----------------------TCEEMIKRAIFARELGVPIVMHDYLTGGFTANTSLAHYCRDNGLLLHIHRAMHAVIDRQKNHGMHFRVLAKALRLSGGDHIHSGTVVGKLEGERDITLGFVDLLRDDFIEKDRSRGIYFTQDWVSLPGVIPVASG-----------------------------GIHVWHMPALTEIFGDDSVLQFGGGTLGHPWGNAPGAVANRVA-----------VEACVKARNEG---RDLAAEGNAIIREACKWSPELAAACEVWKEIKFEFPAMD---", ) self.assertEqual( alignment.column_annotations["state"], "DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDIDDDDDDDDDDDDDDDDDDDDD", ) self.assertEqual( str(alignment), """\ gi|495904 0 MENSDSNDKGSDQSAAQRRSQMDRLDREEAFYQFVNNLSEEDYRLMRDNNLLGTPGESTE 0 ---------.|.|..............|.............|............||---- gi|671626 0 ---------MSPQTETKASVGFKAGVKEYKLTYYTPEYETKDTDILAAFRVTPQPG---- gi|495904 60 EELLRRLQQIKEGPPPQSPDENRAGESSDDVTNSDSIIDWLNSVRQTGNTTRSRQRGNQS 60 -------------.||.......|.|||...---------...|...|.|...|..|--- gi|671626 47 -------------VPPEEAGAAVAAESSTGT---------WTTVWTDGLTSLDRYKG--- gi|495904 120 WRAVSRTNPNSGDFRFSLEINVNRNNGSQTSENESEPSTRRLSVENMESSSQRQMENSAS 120 --......|..|-------------------|...............|..|...|..|.. gi|671626 82 --RCYHIEPVPG-------------------EKDQCICYVAYPLDLFEEGSVTNMFTSIV gi|495904 180 ESASARPSRAERNSTEAVTEVPTTRAQRRARSRSPEHRRTRARAERSMSPLQPTSEIPRR 180 ....................|................|.......|.............. gi|671626 121 GNVFGFKALRALRLEDLRIPVAYVKTFQGPPHGIQVERDKLNKYGRPLLGCTIKPKLGLS gi|495904 240 APTLEQSSENEPEGSSRTRHHVTLRQQISGPELLGRGLFAASGSRNPSQGTSSSDTGSNS 240 |............|.....................|.||.|.........|......... gi|671626 181 AKNYGRAVYECLRGGLDFTKDDENVNSQPFMRWRDRFLFCAEAIYKAQAETGEIKGHYLN gi|495904 300 ESSGSGQRPPTIVLDLQVRRVRPGEYRQRDSIASRTRSRSQAPNNTVTYESERGGFRRTF 300 ...|-----------------------.......|...........|......|||.... gi|671626 241 ATAG-----------------------TCEEMIKRAIFARELGVPIVMHDYLTGGFTANT gi|495904 360 SRSERAGVRTYVSTIRIPIRRILNTGLSETTSVAIQTMLRQIMTGFGELSYFMYSDSDSE 360 |.............|.............................|....|.........| gi|671626 278 SLAHYCRDNGLLLHIHRAMHAVIDRQKNHGMHFRVLAKALRLSGGDHIHSGTVVGKLEGE gi|495904 420 PSASVSSRNVERVESRNGRGSSGGGNSSGSSSSSSPSPSSSGESSESSSKMFEGSSEGGS 420 ...........|........|.|........|.....|..||------------------ gi|671626 338 RDITLGFVDLLRDDFIEKDRSRGIYFTQDWVSLPGVIPVASG------------------ gi|495904 480 SGPSRKDGRHRAPVTFDESGSLPFFSLAQFFLLNEDDEDQPRGLTKEQIDNLAMRSFGEN 480 -----------.............|...............|.|.......|...------ gi|671626 380 -----------GIHVWHMPALTEIFGDDSVLQFGGGTLGHPWGNAPGAVANRVA------ gi|495904 540 DALKTCSVCITEYTEGDKLRKLPCSHEFHVHCIDRWLSE-NSTCPICRRAVLSSGNRESV 540 -----...|.....||---|.|.............|..|-...|..............-- gi|671626 423 -----VEACVKARNEG---RDLAAEGNAIIREACKWSPELAAACEVWKEIKFEFPAMD-- gi|495904 599 V 600 600 - 601 gi|671626 473 - 473 """, ) self.assertEqual( format(alignment, "a2m"), """\ >gi|4959044|gb|AAD34209.1|AF069 MENSDSNDKGSDQSAAQRRSQMDRLDREEAFYQFVNNLSEEDYRLMRDNNLLGTPGESTEEELLRRLQQIKEGPPPQSPDENRAGESSDDVTNSDSIIDWLNSVRQTGNTTRSRQRGNQSWRAVSRTNPNSGDFRFSLEINVNRNNGSQTSENESEPSTRRLSVENMESSSQRQMENSASESASARPSRAERNSTEAVTEVPTTRAQRRARSRSPEHRRTRARAERSMSPLQPTSEIPRRAPTLEQSSENEPEGSSRTRHHVTLRQQISGPELLGRGLFAASGSRNPSQGTSSSDTGSNSESSGSGQRPPTIVLDLQVRRVRPGEYRQRDSIASRTRSRSQAPNNTVTYESERGGFRRTFSRSERAGVRTYVSTIRIPIRRILNTGLSETTSVAIQTMLRQIMTGFGELSYFMYSDSDSEPSASVSSRNVERVESRNGRGSSGGGNSSGSSSSSSPSPSSSGESSESSSKMFEGSSEGGSSGPSRKDGRHRAPVTFDESGSLPFFSLAQFFLLNEDDEDQPRGLTKEQIDNLAMRSFGENDALKTCSVCITEYTEGDKLRKLPCSHEFHVHCIDRWLSE.NSTCPICRRAVLSSGNRESVV >gi|671626|emb|CAA85685.1| ---------MSPQTETKASVGFKAGVKEYKLTYYTPEYETKDTDILAAFRVTPQPG-----------------VPPEEAGAAVAAESSTGT---------WTTVWTDGLTSLDRYKG-----RCYHIEPVPG-------------------EKDQCICYVAYPLDLFEEGSVTNMFTSIVGNVFGFKALRALRLEDLRIPVAYVKTFQGPPHGIQVERDKLNKYGRPLLGCTIKPKLGLSAKNYGRAVYECLRGGLDFTKDDENVNSQPFMRWRDRFLFCAEAIYKAQAETGEIKGHYLNATAG-----------------------TCEEMIKRAIFARELGVPIVMHDYLTGGFTANTSLAHYCRDNGLLLHIHRAMHAVIDRQKNHGMHFRVLAKALRLSGGDHIHSGTVVGKLEGERDITLGFVDLLRDDFIEKDRSRGIYFTQDWVSLPGVIPVASG-----------------------------GIHVWHMPALTEIFGDDSVLQFGGGTLGHPWGNAPGAVANRVA-----------VEACVKARNEG---RDLAAEGNAIIREACKWSPElAAACEVWKEIKFEFPAMD--- """, ) counts = alignment.counts(substitution_matrix) self.assertEqual( repr(counts), "" % id(counts), ) self.assertEqual( str(counts), """\ AlignmentCounts object with substitution_score = -34.0, aligned = 472: identities = 64, positives = 126, mismatches = 408. gaps = 129: left_gaps = 9: left_insertions = 0: open_left_insertions = 0, extend_left_insertions = 0; left_deletions = 9: open_left_deletions = 1, extend_left_deletions = 8; internal_gaps = 117: internal_insertions = 1: open_internal_insertions = 1, extend_internal_insertions = 0; internal_deletions = 116: open_internal_deletions = 8, extend_internal_deletions = 108; right_gaps = 3: right_insertions = 0: open_right_insertions = 0, extend_right_insertions = 0; right_deletions = 3: open_right_deletions = 1, extend_right_deletions = 2. """, ) self.assertEqual(counts.left_insertions, 0) self.assertEqual(counts.left_deletions, 9) self.assertEqual(counts.right_insertions, 0) self.assertEqual(counts.right_deletions, 3) self.assertEqual(counts.internal_insertions, 1) self.assertEqual(counts.internal_deletions, 116) self.assertEqual(counts.left_gaps, 9) self.assertEqual(counts.right_gaps, 3) self.assertEqual(counts.internal_gaps, 117) self.assertEqual(counts.insertions, 1) self.assertEqual(counts.deletions, 128) self.assertEqual(counts.gaps, 129) self.assertEqual(counts.aligned, 472) self.assertEqual(counts.identities, 64) self.assertEqual(counts.mismatches, 408) self.assertEqual(counts.positives, 126) def test_msaprobs(self): path = "Clustalw/msaprobs.a2m" # This example was obtained from # http://virgil.ruc.dk/kurser/Sekvens/Treedraw.htm # and converted to A2M format. with open(path) as stream: alignments = Align.parse(stream, "a2m") alignment = next(alignments) with self.assertRaises(StopIteration): next(alignments) self.assertEqual( repr(alignment), "" % id(alignment), ) self.assertEqual(len(alignment), 8) self.assertEqual(alignment.shape, (8, 298)) self.assertEqual(alignment.sequences[0].id, "V_Harveyi_PATH") self.assertEqual(alignment.sequences[1].id, "B_subtilis_YXEM") self.assertEqual(alignment.sequences[2].id, "FLIY_ECOLI") self.assertEqual(alignment.sequences[3].id, "Deinococcus_radiodurans") self.assertEqual(alignment.sequences[4].id, "B_subtilis_GlnH_homo_YCKK") self.assertEqual(alignment.sequences[5].id, "YA80_HAEIN") self.assertEqual(alignment.sequences[6].id, "E_coli_GlnH") self.assertEqual(alignment.sequences[7].id, "HISJ_E_COLI") self.assertEqual( alignment.sequences[0].seq, "MKNWIKVAVAAIALSAATVQAATEVKVGMSGRYFPFTFVKQDKLQGFEVDMWDEIGKRNDYKIEYVTANFSGLFGLLETGRIDTISNQITMTDARKAKYLFADPYVVDGAQITVRKGNDSIQGVEDLAGKTVAVNLGSNFEQLLRDYDKDGKINIKTYDTGIEHDVALGRADAFIMDRLSALELIKKTGLPLQLAGEPFETIQNAWPFVDNEKGRKLQAEVNKALAEMRADGTVEKISVKWFGADITK", ) self.assertEqual( alignment.sequences[1].seq, "MKMKKWTVLVVAALLAVLSACGNGNSSSKEDDNVLHVGATGQSYPFAYKENGKLTGFDVEVMEAVAKKIDMKLDWKLLEFSGLMGELQTGKLDTISNQVAVTDERKETYNFTKPYAYAGTQIVVKKDNTDIKSVDDLKGKTVAAVLGSNHAKNLESKDPDKKINIKTYETQEGTLKDVAYGRVDAYVNSRTVLIAQIKKTGLPLKLAGDPIVYEQVAFPFAKDDAHDKLRKKVNKALDELRKDGTLKKLSEKYFNEDITVEQKH", ) self.assertEqual( alignment.sequences[2].seq, "MKLAHLGRQALMGVMAVALVAGMSVKSFADEGLLNKVKERGTLLVGLEGTYPPFSFQGDDGKLTGFEVEFAQQLAKHLGVEASLKPTKWDGMLASLDSKRIDVVINQVTISDERKKKYDFSTPYTISGIQALVKKGNEGTIKTADDLKGKKVGVGLGTNYEEWLRQNVQGVDVRTYDDDPTKYQDLRVGRIDAILVDRLAALDLVKKTNDTLAVTGEAFSRQESGVALRKGNEDLLKAVNDAIAEMQKDGTLQALSEKWFGADVTK", ) self.assertEqual( alignment.sequences[3].seq, "MKKSLLSLKLSGLLVPSVLALSLSACSSPSSTLNQGTLKIAMEGTYPPFTSKNEQGELVGFDVDIAKAVAQKLNLKPEFVLTEWSGILAGLQANKYDVIVNQVGITPERQNSIGFSQPYAYSRPEIIVAKNNTFNPQSLADLKGKRVGSTLGSNYEKQLIDTGDIKIVTYPGAPEILADLVAGRIDAAYNDRLVVNYIINDQKLPVRGAGQIGDAAPVGIALKKGNSALKDQIDKALTEMRSDGTFEKISQKWFGQDVGQP", ) self.assertEqual( alignment.sequences[4].seq, "MKKALLALFMVVSIAALAACGAGNDNQSKDNAKDGDLWASIKKKGVLTVGTEGTYEPFTYHDKDTDKLTGYDVEVITEVAKRLGLKVDFKETQWGSMFAGLNSKRFDVVANQVGKTDREDKYDFSDKYTTSRAVVVTKKDNNDIKSEADVKGKTSAQSLTSNYNKLATNAGAKVEGVEGMAQALQMIQQARVDMTYNDKLAVLNYLKTSGNKNVKIAFETGEPQSTYFTFRKGSGEVVDQVNKALKEMKEDGTLSKISKKWFGEDVSK", ) self.assertEqual( alignment.sequences[5].seq, "MKKLLFTTALLTGAIAFSTFSHAGEIADRVEKTKTLLVGTEGTYAPFTFHDKSGKLTGFDVEVIRKVAEKLGLKVEFKETQWDAMYAGLNAKRFDVIANQTNPSPERLKKYSFTTPYNYSGGVIVTKSSDNSIKSFEDLKGRKSAQSATSNWGKDAKAAGAQILVVDGLAQSLELIKQGRAEATINDKLAVLDYFKQHPNSGLKIAYDRGDKTPTAFAFLQGEDALITKFNQVLEALRQDGTLKQISIEWFGYDITQ", ) self.assertEqual( alignment.sequences[6].seq, "MKSVLKVSLAALTLAFAVSSHAADKKLVVATDTAFVPFEFKQGDKYVGFDVDLWAAIAKELKLDYELKPMDFSGIIPALQTKNVDLALAGITITDERKKAIDFSDGYYKSGLLVMVKANNNDVKSVKDLDGKVVAVKSGTGSVDYAKANIKTKDLRQFPNIDNAYMELGTNRADAVLHDTPNILYFIKTAGNGQFKAVGDSLEAQQYGIAFPKGSDELRDKVNGALKTLRENGTYNEIYKKWFGTEPK", ) self.assertEqual( alignment.sequences[7].seq, "MKKLVLSLSLVLAFSSATAAFAAIPQNIRIGTDPTYAPFESKNSQGELVGFDIDLAKELCKRINTQCTFVENPLDALIPSLKAKKIDAIMSSLSITEKRQQEIAFTDKLYAADSRLVVAKNSDIQPTVESLKGKRVGVLQGTTQETFGNEHWAPKGIEIVSYQGQDNIYSDLTAGRIDAAFQDEVAASEGFLKQPVGKDYKFGGPSVKDEKLFGVGTGMGLRKEDNELREALNKAFAEMRADGTYEKLAKKYFDFDVYGG", ) self.assertEqual( alignment[0], "MKNW--------IKV----AVAAI-A--LSAA-------------------TVQAATEVKVGMSGRYFPFTFVK--QDKLQGFEVDMWDEIGKRNDYKIEYVTANFSGLFGLLETGRIDTISNQITMTDARKAKYLFADPYVVDGAQITVRK-GNDSIQGVEDLAGKTVAVNLGSNFEQLLRDYDKDGKINIKTYDT--GIEHDVALGRADAFIMDRLSALE-LIKKTG-LPLQLAGEPFE-----TIQNAWPFVDNEKGRKLQAEVNKALAEMRADGTVEKISVKWFGADITK----", ) self.assertEqual( alignment[1], "MKMKKW------TVL----VVAALLA-VLSACGN------------G-NSSSKEDDNVLHVGATGQSYPFAYKE--NGKLTGFDVEVMEAVAKKIDMKLDWKLLEFSGLMGELQTGKLDTISNQVAVTDERKETYNFTKPYAYAGTQIVVKK-DNTDIKSVDDLKGKTVAAVLGSNHAKNLESKDPDKKINIKTYETQEGTLKDVAYGRVDAYVNSRTVLIA-QIKKTG-LPLKLAGDPIV-----YEQVAFPFAKDDAHDKLRKKVNKALDELRKDGTLKKLSEKYFNEDITVEQKH", ) self.assertEqual( alignment[2], "MKLAHLGRQALMGVM----AVALVAG--MSVKSF---------ADEG-LLNKVKERGTLLVGLEGTYPPFSFQGD-DGKLTGFEVEFAQQLAKHLGVEASLKPTKWDGMLASLDSKRIDVVINQVTISDERKKKYDFSTPYTISGIQALVKKGNEGTIKTADDLKGKKVGVGLGTNYEEWLRQN--VQGVDVRTYDDDPTKYQDLRVGRIDAILVDRLAALD-LVKKTN-DTLAVTGEAFS-----RQESGVALRK--GNEDLLKAVNDAIAEMQKDGTLQALSEKWFGADVTK----", ) self.assertEqual( alignment[3], "MKKSLL------SLKLSGLLVPSVLALSLSACSS---------------PSSTLNQGTLKIAMEGTYPPFTSKNE-QGELVGFDVDIAKAVAQKLNLKPEFVLTEWSGILAGLQANKYDVIVNQVGITPERQNSIGFSQPYAYSRPEIIVAKNNTFNPQSLADLKGKRVGSTLGSNYEKQLI-D--TGDIKIVTYPGAPEILADLVAGRIDAAYNDRLVVNY-IIND-QKLPVRGAGQIGD-----AAPVGIALKK--GNSALKDQIDKALTEMRSDGTFEKISQKWFGQDVGQ---P", ) self.assertEqual( alignment[4], "MKKALL------ALF----MVVSIAA--LAACGAGNDNQSKDNAKDGDLWASIKKKGVLTVGTEGTYEPFTYHDKDTDKLTGYDVEVITEVAKRLGLKVDFKETQWGSMFAGLNSKRFDVVANQVGKTD-REDKYDFSDKYTTSRAVVVTKK-DNNDIKSEADVKGKTSAQSLTSNYNKLAT-N--A-GAKVEGVEGMAQALQMIQQARVDMTYNDKLAVLN-YLKTSGNKNVKIAFETGE-----PQSTYFTFRK--GSGEVVDQVNKALKEMKEDGTLSKISKKWFGEDVSK----", ) self.assertEqual( alignment[5], "MKKLLF------TTA----LLTGAIA--FSTFS-----------HAGEIADRVEKTKTLLVGTEGTYAPFTFHDK-SGKLTGFDVEVIRKVAEKLGLKVEFKETQWDAMYAGLNAKRFDVIANQTNPSPERLKKYSFTTPYNYSGGVIVTKS-SDNSIKSFEDLKGRKSAQSATSNWGKDAK-A--A-GAQILVVDGLAQSLELIKQGRAEATINDKLAVLD-YFKQHPNSGLKIAYDRGD-----KTPTAFAFLQ--GEDALITKFNQVLEALRQDGTLKQISIEWFGYDITQ----", ) self.assertEqual( alignment[6], "MKSVL-------KVS----LAALTLA--FAVSSH---------A----------ADKKLVVATDTAFVPFEFKQ--GDKYVGFDVDLWAAIAKELKLDYELKPMDFSGIIPALQTKNVDLALAGITITDERKKAIDFSDGYYKSGLLVMVKAN-NNDVKSVKDLDGKVVAVKSGTGSVDYAKAN--IKTKDLRQFPNIDNAYMELGTNRADAVLHDTPNILY-FIKTAGNGQFKAVGDSLE-----AQQYGIAFPK--GSDELRDKVNGALKTLRENGTYNEIYKKWFGTEP-K----", ) self.assertEqual( alignment[7], "MKKLVL------SLS----LV---LA--FSSATA---------------A-FAAIPQNIRIGTDPTYAPFESKNS-QGELVGFDIDLAKELCKRINTQCTFVENPLDALIPSLKAKKIDAIMSSLSITEKRQQEIAFTDKLYAADSRLVVAK-NSDIQPTVESLKGKRVGVLQGTTQETFGNEHWAPKGIEIVSYQGQDNIYSDLTAGRIDAAFQDEVAASEGFLKQPVGKDYKFGGPSVKDEKLFGVGTGMGLRK--EDNELREALNKAFAEMRADGTYEKLAKKYFDFDVYG---G", ) self.assertEqual( alignment.column_annotations["state"], "DDDDIIIIIIIIDDDIIIIDDDDDIDIIDDDDIIIIIIIIIIIIIIIIIIIDDDDDDDDDDDDDDDDDDDDDDDIIDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDIDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDIIDDDDDDDDDDDDDDDDDDDDDDDIDDDDDDIDDDDDDDDDDDIIIIIDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDIIII", ) self.assertEqual( str(alignment), """\ V_Harveyi 0 MKNW--------IKV----AVAAI-A--LSAA-------------------TVQAATEVK B_subtili 0 MKMKKW------TVL----VVAALLA-VLSACGN------------G-NSSSKEDDNVLH FLIY_ECOL 0 MKLAHLGRQALMGVM----AVALVAG--MSVKSF---------ADEG-LLNKVKERGTLL Deinococc 0 MKKSLL------SLKLSGLLVPSVLALSLSACSS---------------PSSTLNQGTLK B_subtili 0 MKKALL------ALF----MVVSIAA--LAACGAGNDNQSKDNAKDGDLWASIKKKGVLT YA80_HAEI 0 MKKLLF------TTA----LLTGAIA--FSTFS-----------HAGEIADRVEKTKTLL E_coli_Gl 0 MKSVL-------KVS----LAALTLA--FAVSSH---------A----------ADKKLV HISJ_E_CO 0 MKKLVL------SLS----LV---LA--FSSATA---------------A-FAAIPQNIR V_Harveyi 26 VGMSGRYFPFTFVK--QDKLQGFEVDMWDEIGKRNDYKIEYVTANFSGLFGLLETGRIDT B_subtili 36 VGATGQSYPFAYKE--NGKLTGFDVEVMEAVAKKIDMKLDWKLLEFSGLMGELQTGKLDT FLIY_ECOL 44 VGLEGTYPPFSFQGD-DGKLTGFEVEFAQQLAKHLGVEASLKPTKWDGMLASLDSKRIDV Deinococc 39 IAMEGTYPPFTSKNE-QGELVGFDVDIAKAVAQKLNLKPEFVLTEWSGILAGLQANKYDV B_subtili 48 VGTEGTYEPFTYHDKDTDKLTGYDVEVITEVAKRLGLKVDFKETQWGSMFAGLNSKRFDV YA80_HAEI 37 VGTEGTYAPFTFHDK-SGKLTGFDVEVIRKVAEKLGLKVEFKETQWDAMYAGLNAKRFDV E_coli_Gl 28 VATDTAFVPFEFKQ--GDKYVGFDVDLWAAIAKELKLDYELKPMDFSGIIPALQTKNVDL HISJ_E_CO 29 IGTDPTYAPFESKNS-QGELVGFDIDLAKELCKRINTQCTFVENPLDALIPSLKAKKIDA V_Harveyi 84 ISNQITMTDARKAKYLFADPYVVDGAQITVRK-GNDSIQGVEDLAGKTVAVNLGSNFEQL B_subtili 94 ISNQVAVTDERKETYNFTKPYAYAGTQIVVKK-DNTDIKSVDDLKGKTVAAVLGSNHAKN FLIY_ECOL 103 VINQVTISDERKKKYDFSTPYTISGIQALVKKGNEGTIKTADDLKGKKVGVGLGTNYEEW Deinococc 98 IVNQVGITPERQNSIGFSQPYAYSRPEIIVAKNNTFNPQSLADLKGKRVGSTLGSNYEKQ B_subtili 108 VANQVGKTD-REDKYDFSDKYTTSRAVVVTKK-DNNDIKSEADVKGKTSAQSLTSNYNKL YA80_HAEI 96 IANQTNPSPERLKKYSFTTPYNYSGGVIVTKS-SDNSIKSFEDLKGRKSAQSATSNWGKD E_coli_Gl 86 ALAGITITDERKKAIDFSDGYYKSGLLVMVKAN-NNDVKSVKDLDGKVVAVKSGTGSVDY HISJ_E_CO 88 IMSSLSITEKRQQEIAFTDKLYAADSRLVVAK-NSDIQPTVESLKGKRVGVLQGTTQETF V_Harveyi 143 LRDYDKDGKINIKTYDT--GIEHDVALGRADAFIMDRLSALE-LIKKTG-LPLQLAGEPF B_subtili 153 LESKDPDKKINIKTYETQEGTLKDVAYGRVDAYVNSRTVLIA-QIKKTG-LPLKLAGDPI FLIY_ECOL 163 LRQN--VQGVDVRTYDDDPTKYQDLRVGRIDAILVDRLAALD-LVKKTN-DTLAVTGEAF Deinococc 158 LI-D--TGDIKIVTYPGAPEILADLVAGRIDAAYNDRLVVNY-IIND-QKLPVRGAGQIG B_subtili 166 AT-N--A-GAKVEGVEGMAQALQMIQQARVDMTYNDKLAVLN-YLKTSGNKNVKIAFETG YA80_HAEI 155 AK-A--A-GAQILVVDGLAQSLELIKQGRAEATINDKLAVLD-YFKQHPNSGLKIAYDRG E_coli_Gl 145 AKAN--IKTKDLRQFPNIDNAYMELGTNRADAVLHDTPNILY-FIKTAGNGQFKAVGDSL HISJ_E_CO 147 GNEHWAPKGIEIVSYQGQDNIYSDLTAGRIDAAFQDEVAASEGFLKQPVGKDYKFGGPSV V_Harveyi 199 E-----TIQNAWPFVDNEKGRKLQAEVNKALAEMRADGTVEKISVKWFGADITK---- B_subtili 211 V-----YEQVAFPFAKDDAHDKLRKKVNKALDELRKDGTLKKLSEKYFNEDITVEQKH FLIY_ECOL 219 S-----RQESGVALRK--GNEDLLKAVNDAIAEMQKDGTLQALSEKWFGADVTK---- Deinococc 213 D-----AAPVGIALKK--GNSALKDQIDKALTEMRSDGTFEKISQKWFGQDVGQ---P B_subtili 221 E-----PQSTYFTFRK--GSGEVVDQVNKALKEMKEDGTLSKISKKWFGEDVSK---- YA80_HAEI 210 D-----KTPTAFAFLQ--GEDALITKFNQVLEALRQDGTLKQISIEWFGYDITQ---- E_coli_Gl 202 E-----AQQYGIAFPK--GSDELRDKVNGALKTLRENGTYNEIYKKWFGTEP-K---- HISJ_E_CO 207 KDEKLFGVGTGMGLRK--EDNELREALNKAFAEMRADGTYEKLAKKYFDFDVYG---G V_Harveyi 248 B_subtili 264 FLIY_ECOL 266 Deinococc 261 B_subtili 268 YA80_HAEI 257 E_coli_Gl 248 HISJ_E_CO 260 """, ) self.assertEqual( format(alignment, "a2m"), """\ >V_Harveyi_PATH MKNW........IKV....AVAAI.A..LSAA...................TVQAATEVKVGMSGRYFPFTFVK..QDKLQGFEVDMWDEIGKRNDYKIEYVTANFSGLFGLLETGRIDTISNQITMTDARKAKYLFADPYVVDGAQITVRK.GNDSIQGVEDLAGKTVAVNLGSNFEQLLRDYDKDGKINIKTYDT..GIEHDVALGRADAFIMDRLSALE.LIKKTG.LPLQLAGEPFE.....TIQNAWPFVDNEKGRKLQAEVNKALAEMRADGTVEKISVKWFGADITK.... >B_subtilis_YXEM MKMKkw......TVL....VVAALlA.vLSACgn............g.nssSKEDDNVLHVGATGQSYPFAYKE..NGKLTGFDVEVMEAVAKKIDMKLDWKLLEFSGLMGELQTGKLDTISNQVAVTDERKETYNFTKPYAYAGTQIVVKK.DNTDIKSVDDLKGKTVAAVLGSNHAKNLESKDPDKKINIKTYETqeGTLKDVAYGRVDAYVNSRTVLIA.QIKKTG.LPLKLAGDPIV.....YEQVAFPFAKDDAHDKLRKKVNKALDELRKDGTLKKLSEKYFNEDITVeqkh >FLIY_ECOLI MKLAhlgrqalmGVM....AVALVaG..MSVKsf.........adeg.llnKVKERGTLLVGLEGTYPPFSFQGd.DGKLTGFEVEFAQQLAKHLGVEASLKPTKWDGMLASLDSKRIDVVINQVTISDERKKKYDFSTPYTISGIQALVKKgNEGTIKTADDLKGKKVGVGLGTNYEEWLRQN--VQGVDVRTYDDdpTKYQDLRVGRIDAILVDRLAALD.LVKKTN.DTLAVTGEAFS.....RQESGVALRK--GNEDLLKAVNDAIAEMQKDGTLQALSEKWFGADVTK.... >Deinococcus_radiodurans MKKSll......SLKlsglLVPSVlAlsLSACss...............psSTLNQGTLKIAMEGTYPPFTSKNe.QGELVGFDVDIAKAVAQKLNLKPEFVLTEWSGILAGLQANKYDVIVNQVGITPERQNSIGFSQPYAYSRPEIIVAKnNTFNPQSLADLKGKRVGSTLGSNYEKQLI-D--TGDIKIVTYPGapEILADLVAGRIDAAYNDRLVVNY.IIND-QkLPVRGAGQIGD.....AAPVGIALKK--GNSALKDQIDKALTEMRSDGTFEKISQKWFGQDVGQ...p >B_subtilis_GlnH_homo_YCKK MKKAll......ALF....MVVSIaA..LAACgagndnqskdnakdgdlwaSIKKKGVLTVGTEGTYEPFTYHDkdTDKLTGYDVEVITEVAKRLGLKVDFKETQWGSMFAGLNSKRFDVVANQVGKTD-REDKYDFSDKYTTSRAVVVTKK.DNNDIKSEADVKGKTSAQSLTSNYNKLAT-N--A-GAKVEGVEGmaQALQMIQQARVDMTYNDKLAVLN.YLKTSGnKNVKIAFETGE.....PQSTYFTFRK--GSGEVVDQVNKALKEMKEDGTLSKISKKWFGEDVSK.... >YA80_HAEIN MKKLlf......TTA....LLTGAiA..FSTFs...........hageiadRVEKTKTLLVGTEGTYAPFTFHDk.SGKLTGFDVEVIRKVAEKLGLKVEFKETQWDAMYAGLNAKRFDVIANQTNPSPERLKKYSFTTPYNYSGGVIVTKS.SDNSIKSFEDLKGRKSAQSATSNWGKDAK-A--A-GAQILVVDGlaQSLELIKQGRAEATINDKLAVLD.YFKQHPnSGLKIAYDRGD.....KTPTAFAFLQ--GEDALITKFNQVLEALRQDGTLKQISIEWFGYDITQ.... >E_coli_GlnH MKSVl.......KVS....LAALTlA..FAVSsh.........a.......---ADKKLVVATDTAFVPFEFKQ..GDKYVGFDVDLWAAIAKELKLDYELKPMDFSGIIPALQTKNVDLALAGITITDERKKAIDFSDGYYKSGLLVMVKAn-NNDVKSVKDLDGKVVAVKSGTGSVDYAKAN--IKTKDLRQFPNidNAYMELGTNRADAVLHDTPNILY.FIKTAGnGQFKAVGDSLE.....AQQYGIAFPK--GSDELRDKVNGALKTLRENGTYNEIYKKWFGTEP-K.... >HISJ_E_COLI MKKLvl......SLS....LV---lA..FSSAta...............a.FAAIPQNIRIGTDPTYAPFESKNs.QGELVGFDIDLAKELCKRINTQCTFVENPLDALIPSLKAKKIDAIMSSLSITEKRQQEIAFTDKLYAADSRLVVAK.NSDIQPTVESLKGKRVGVLQGTTQETFGNEHWAPKGIEIVSYQGqdNIYSDLTAGRIDAAFQDEVAASEgFLKQPVgKDYKFGGPSVKdeklfGVGTGMGLRK--EDNELREALNKAFAEMRADGTYEKLAKKYFDFDVYG...g """, ) counts = alignment.counts(substitution_matrix) self.assertEqual( repr(counts), "" % id(counts), ) self.assertEqual( str(counts), """\ AlignmentCounts object with substitution_score = 10664.0, aligned = 6948: identities = 2353, positives = 3745, mismatches = 4595. gaps = 608: left_gaps = 0: left_insertions = 0: open_left_insertions = 0, extend_left_insertions = 0; left_deletions = 0: open_left_deletions = 0, extend_left_deletions = 0; internal_gaps = 578: internal_insertions = 285: open_internal_insertions = 126, extend_internal_insertions = 159; internal_deletions = 293: open_internal_deletions = 125, extend_internal_deletions = 168; right_gaps = 30: right_insertions = 11: open_right_insertions = 8, extend_right_insertions = 3; right_deletions = 19: open_right_deletions = 7, extend_right_deletions = 12. """, ) self.assertEqual(counts.left_insertions, 0) self.assertEqual(counts.left_deletions, 0) self.assertEqual(counts.right_insertions, 11) self.assertEqual(counts.right_deletions, 19) self.assertEqual(counts.internal_insertions, 285) self.assertEqual(counts.internal_deletions, 293) self.assertEqual(counts.left_gaps, 0) self.assertEqual(counts.right_gaps, 30) self.assertEqual(counts.internal_gaps, 578) self.assertEqual(counts.insertions, 296) self.assertEqual(counts.deletions, 312) self.assertEqual(counts.gaps, 608) self.assertEqual(counts.aligned, 6948) self.assertEqual(counts.identities, 2353) self.assertEqual(counts.mismatches, 4595) self.assertEqual(counts.positives, 3745) self.check_reading_writing(path) def test_muscle(self): path = "Clustalw/muscle.a2m" with open(path) as stream: alignments = Align.parse(stream, "a2m") alignment = next(alignments) with self.assertRaises(StopIteration): next(alignments) self.assertEqual( repr(alignment), "" % id(alignment), ) self.assertEqual(len(alignment), 3) self.assertEqual(alignment.sequences[0].id, "Test1seq") self.assertEqual(alignment.sequences[1].id, "AT3G20900.1-SEQ") self.assertEqual(alignment.sequences[2].id, "AT3G20900.1-CDS") self.assertEqual( alignment.sequences[0].seq, "AGTTACAATAACTGACGAAGCTAAGTAGGCTACTAATTAACGTCATCAACCTAATACATAGCACTTAGAAAAAAGTGAAGTAAGAAAATATAAAATAATAAAAGGGTGGGTTATCAATTGATAGTGTAAATCATCGTATTCCGGTGATATACCCTACCACAAAAACTCAAACCGACTTGATTCAAATCATCTCAATAAATTAGCGCCAAAATAATGAAAAAAATAATAACAAACAAAAACAAACCAAAATAAGAAAAAACATTACGCAAAACATAATAATTTACTCTTCGTTATTGTATTAACAAATCAAAGAGCTGAATTTTGATCACCTGCTAATACTACTTTCTGTATTGATCCTATATCAACGTAAACAAAGATACTAATAATTAACTAAAAGTACGTTCATCGATCGTGTTCGTTGACGAAGAAGAGCTCTATCTCCGGCGGAGCAAAGAAAACGATCTGTCTCCGTCGTAACACACGGTCGCTAGAGAAACTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCCGGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCGTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTT", ) self.assertEqual( alignment.sequences[1].seq, "ATGAACAAAGTAGCGAGGAAGAACAAAACATCAGGTGAACAAAAAAAAAACTCAATCCACATCAAAGTTACAATAACTGACGAAGCTAAGTAGGCTAGAAATTAAAGTCATCAACCTAATACATAGCACTTAGAAAAAAGTGAAGCAAGAAAATATAAAATAATAAAAGGGTGGGTTATCAATTGATAGTGTAAATCATAGTTGATTTTTGATATACCCTACCACAAAAACTCAAACCGACTTGATTCAAATCATCTCAAAAAACAAGCGCCAAAATAATGAAAAAAATAATAACAAAAACAAACAAACCAAAATAAGAAAAAACATTACGCAAAACATAATAATTTACTCTTCGTTATTGTATTAACAAATCAAAGAGATGAATTTTGATCACCTGCTAATACTACTTTCTGTATTGATCCTATATCAAAAAAAAAAAAGATACTAATAATTAACTAAAAGTACGTTCATCGATCGTGTGCGTTGACGAAGAAGAGCTCTATCTCCGGCGGAGCAAAGAAAACGATCTGTCTCCGTCGTAACACACAGTTTTTCGAGACCCTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCCGGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCCTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTAG", ) self.assertEqual( alignment.sequences[2].seq, "ATGAACAAAGTAGCGAGGAAGAACAAAACATCAGCAAAGAAAACGATCTGTCTCCGTCGTAACACACAGTTTTTCGAGACCCTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCCGGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCCTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTAG", ) self.assertEqual( alignment[0], "-----------------------------------------------------------------AGTTACAATAACTGACGAAGCTAAGTAGGCTACTAATTAACGTCATCAACCTAATACATAGCACTTAGAAAAAAGTGAAGTAAGAAAATATAAAATAATAAAAGGGTGGGTTATCAATTGATAGTGTAAATCATCGTATTCCGGTGATATACCCTACCACAAAAACTCAAACCGACTTGATTCAAATCATCTCAATAAATTAGCGCCAAAATAATGAAAAAAATAATAACAAACAAAAACAAACCAAAATAAGAAAAAACATTACGCAAAACATAATAATTTACTCTTCGTTATTGTATTAACAAATCAAAGAGCTGAATTTTGATCACCTGCTAATACTACTTTCTGTATTGATCCTATATCAACGTAAACAAAGATACTAATAATTAACTAAAAGTACGTTCATCGATCGTGTTCGTTGACGAAGAAGAGCTCTATCTCCGGCGGAGCAAAGAAAACGATCTGTCTCCGTCGTAACACACGGTCGCTAGAGAAACTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCCGGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCGTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTT-", ) self.assertEqual( alignment[1], "ATGAACAAAGTAGCGAGGAAGAACAAAACATCAGGTGAACAAAAAAAAAACTCAATCCACATCAAAGTTACAATAACTGACGAAGCTAAGTAGGCTAGAAATTAAAGTCATCAACCTAATACATAGCACTTAGAAAAAAGTGAAGCAAGAAAATATAAAATAATAAAAGGGTGGGTTATCAATTGATAGTGTAAATCATAGTTGATTTTTGATATACCCTACCACAAAAACTCAAACCGACTTGATTCAAATCATCTCAAAAAACAAGCGCCAAAATAATGAAAAAAATAATAACAAAAACAAACAAACCAAAATAAGAAAAAACATTACGCAAAACATAATAATTTACTCTTCGTTATTGTATTAACAAATCAAAGAGATGAATTTTGATCACCTGCTAATACTACTTTCTGTATTGATCCTATATCAAAAAAAAAAAAGATACTAATAATTAACTAAAAGTACGTTCATCGATCGTGTGCGTTGACGAAGAAGAGCTCTATCTCCGGCGGAGCAAAGAAAACGATCTGTCTCCGTCGTAACACACAGTTTTTCGAGACCCTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCCGGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCCTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTAG", ) self.assertEqual( alignment[2], "--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ATGAACAAAGTAGCGAGGAAGAA------------------------------CAAAACATC----------------------------------------------------------------------------------------------------------------------------------------------------------------------------AGCAAAGAAAACGATCTGTCTCCGTCGTAACACACAGTTTTTCGAGACCCTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCCGGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCCTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTAG", ) self.assertEqual( alignment.column_annotations["state"], "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDI", ) self.assertEqual( str(alignment), """\ Test1seq 0 ------------------------------------------------------------ AT3G20900 0 ATGAACAAAGTAGCGAGGAAGAACAAAACATCAGGTGAACAAAAAAAAAACTCAATCCAC AT3G20900 0 ------------------------------------------------------------ Test1seq 0 -----AGTTACAATAACTGACGAAGCTAAGTAGGCTACTAATTAACGTCATCAACCTAAT AT3G20900 60 ATCAAAGTTACAATAACTGACGAAGCTAAGTAGGCTAGAAATTAAAGTCATCAACCTAAT AT3G20900 0 ------------------------------------------------------------ Test1seq 55 ACATAGCACTTAGAAAAAAGTGAAGTAAGAAAATATAAAATAATAAAAGGGTGGGTTATC AT3G20900 120 ACATAGCACTTAGAAAAAAGTGAAGCAAGAAAATATAAAATAATAAAAGGGTGGGTTATC AT3G20900 0 ------------------------------------------------------------ Test1seq 115 AATTGATAGTGTAAATCATCGTATTCCGGTGATATACCCTACCACAAAAACTCAAACCGA AT3G20900 180 AATTGATAGTGTAAATCATAGTTGATTTTTGATATACCCTACCACAAAAACTCAAACCGA AT3G20900 0 ------------------------------------------------------------ Test1seq 175 CTTGATTCAAATCATCTCAATAAATTAGCGCCAAAATAATGAAAAAAATAATAACAAACA AT3G20900 240 CTTGATTCAAATCATCTCAAAAAACAAGCGCCAAAATAATGAAAAAAATAATAACAAAAA AT3G20900 0 --------------------------------------ATGAACAAAGTAGCGAGGAAGA Test1seq 235 AAAACAAACCAAAATAAGAAAAAACATTACGCAAAACATAATAATTTACTCTTCGTTATT AT3G20900 300 CAAACAAACCAAAATAAGAAAAAACATTACGCAAAACATAATAATTTACTCTTCGTTATT AT3G20900 22 A------------------------------CAAAACATC-------------------- Test1seq 295 GTATTAACAAATCAAAGAGCTGAATTTTGATCACCTGCTAATACTACTTTCTGTATTGAT AT3G20900 360 GTATTAACAAATCAAAGAGATGAATTTTGATCACCTGCTAATACTACTTTCTGTATTGAT AT3G20900 32 ------------------------------------------------------------ Test1seq 355 CCTATATCAACGTAAACAAAGATACTAATAATTAACTAAAAGTACGTTCATCGATCGTGT AT3G20900 420 CCTATATCAAAAAAAAAAAAGATACTAATAATTAACTAAAAGTACGTTCATCGATCGTGT AT3G20900 32 ------------------------------------------------------------ Test1seq 415 TCGTTGACGAAGAAGAGCTCTATCTCCGGCGGAGCAAAGAAAACGATCTGTCTCCGTCGT AT3G20900 480 GCGTTGACGAAGAAGAGCTCTATCTCCGGCGGAGCAAAGAAAACGATCTGTCTCCGTCGT AT3G20900 32 --------------------------------AGCAAAGAAAACGATCTGTCTCCGTCGT Test1seq 475 AACACACGGTCGCTAGAGAAACTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCC AT3G20900 540 AACACACAGTTTTTCGAGACCCTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCC AT3G20900 60 AACACACAGTTTTTCGAGACCCTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCC Test1seq 535 GGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCGTGGTGACGTCAGCACCGCT AT3G20900 600 GGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCCTGGTGACGTCAGCACCGCT AT3G20900 120 GGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCCTGGTGACGTCAGCACCGCT Test1seq 595 GCTGGGGATGGAGAGGGAACAGAGTT- 621 AT3G20900 660 GCTGGGGATGGAGAGGGAACAGAGTAG 687 AT3G20900 180 GCTGGGGATGGAGAGGGAACAGAGTAG 207 """, ) self.assertEqual( format(alignment, "a2m"), """\ >Test1seq .................................................................AGTTACAATAACTGACGAAGCTAAGTAGGCTACTAATTAACGTCATCAACCTAATACATAGCACTTAGAAAAAAGTGAAGTAAGAAAATATAAAATAATAAAAGGGTGGGTTATCAATTGATAGTGTAAATCATCGTATTCCGGTGATATACCCTACCACAAAAACTCAAACCGACTTGATTCAAATCATCTCAATAAATTAGCGCCAAAATAATGAAAAAAATAATAACAAACAAAAACAAACCAAAATAAGAAAAAACATTACGCAAAACATAATAATTTACTCTTCGTTATTGTATTAACAAATCAAAGAGCTGAATTTTGATCACCTGCTAATACTACTTTCTGTATTGATCCTATATCAACGTAAACAAAGATACTAATAATTAACTAAAAGTACGTTCATCGATCGTGTTCGTTGACGAAGAAGAGCTCTATCTCCGGCGGAGCAAAGAAAACGATCTGTCTCCGTCGTAACACACGGTCGCTAGAGAAACTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCCGGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCGTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTT. >AT3G20900.1-SEQ atgaacaaagtagcgaggaagaacaaaacatcaggtgaacaaaaaaaaaactcaatccacatcaaAGTTACAATAACTGACGAAGCTAAGTAGGCTAGAAATTAAAGTCATCAACCTAATACATAGCACTTAGAAAAAAGTGAAGCAAGAAAATATAAAATAATAAAAGGGTGGGTTATCAATTGATAGTGTAAATCATAGTTGATTTTTGATATACCCTACCACAAAAACTCAAACCGACTTGATTCAAATCATCTCAAAAAACAAGCGCCAAAATAATGAAAAAAATAATAACAAAAACAAACAAACCAAAATAAGAAAAAACATTACGCAAAACATAATAATTTACTCTTCGTTATTGTATTAACAAATCAAAGAGATGAATTTTGATCACCTGCTAATACTACTTTCTGTATTGATCCTATATCAAAAAAAAAAAAGATACTAATAATTAACTAAAAGTACGTTCATCGATCGTGTGCGTTGACGAAGAAGAGCTCTATCTCCGGCGGAGCAAAGAAAACGATCTGTCTCCGTCGTAACACACAGTTTTTCGAGACCCTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCCGGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCCTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTAg >AT3G20900.1-CDS .................................................................---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ATGAACAAAGTAGCGAGGAAGAA------------------------------CAAAACATC----------------------------------------------------------------------------------------------------------------------------------------------------------------------------AGCAAAGAAAACGATCTGTCTCCGTCGTAACACACAGTTTTTCGAGACCCTTTGCTTCTTCGGCGCCGGTGGACACGTCAGCATCTCCGGTATCCTAGACTTCTTGGCTTTCGGGGTACAACAACCGCCTGGTGACGTCAGCACCGCTGCTGGGGATGGAGAGGGAACAGAGTAg """, ) counts = alignment.counts() self.assertEqual( repr(counts), "" % id(counts), ) self.assertEqual( str(counts), """\ AlignmentCounts object with aligned = 1034: identities = 974, mismatches = 60. gaps = 962: left_gaps = 556: left_insertions = 65: open_left_insertions = 1, extend_left_insertions = 64; left_deletions = 491: open_left_deletions = 2, extend_left_deletions = 489; internal_gaps = 404: internal_insertions = 0: open_internal_insertions = 0, extend_internal_insertions = 0; internal_deletions = 404: open_internal_deletions = 4, extend_internal_deletions = 400; right_gaps = 2: right_insertions = 2: open_right_insertions = 2, extend_right_insertions = 0; right_deletions = 0: open_right_deletions = 0, extend_right_deletions = 0. """, ) self.assertEqual(counts.left_insertions, 65) self.assertEqual(counts.left_deletions, 491) self.assertEqual(counts.right_insertions, 2) self.assertEqual(counts.right_deletions, 0) self.assertEqual(counts.internal_insertions, 0) self.assertEqual(counts.internal_deletions, 404) self.assertEqual(counts.left_gaps, 556) self.assertEqual(counts.right_gaps, 2) self.assertEqual(counts.internal_gaps, 404) self.assertEqual(counts.insertions, 67) self.assertEqual(counts.deletions, 895) self.assertEqual(counts.gaps, 962) self.assertEqual(counts.aligned, 1034) self.assertEqual(counts.identities, 974) self.assertEqual(counts.mismatches, 60) self.check_reading_writing(path) def test_kalign(self): path = "Clustalw/kalign.a2m" with open(path) as stream: alignments = Align.parse(stream, "a2m") alignment = next(alignments) with self.assertRaises(StopIteration): next(alignments) self.assertEqual( repr(alignment), "" % id(alignment), ) self.assertEqual(len(alignment), 2) self.assertEqual(alignment.sequences[0].id, "Test1seq") self.assertEqual(alignment.sequences[1].id, "AT3G20900") self.assertEqual(alignment.sequences[0].seq, "GCTGGGGATGGAGAGGGAACAGAGTT") self.assertEqual(alignment.sequences[1].seq, "GCTGGGGATGGAGAGGGAACAGAGTAG") self.assertEqual(alignment[0], "GCTGGGGATGGAGAGGGAACAGAGT-T") self.assertEqual(alignment[1], "GCTGGGGATGGAGAGGGAACAGAGTAG") self.assertEqual( alignment.column_annotations["state"], "DDDDDDDDDDDDDDDDDDDDDDDDDID", ) self.assertEqual( str(alignment), """\ Test1seq 0 GCTGGGGATGGAGAGGGAACAGAGT-T 26 0 |||||||||||||||||||||||||-. 27 AT3G20900 0 GCTGGGGATGGAGAGGGAACAGAGTAG 27 """, ) self.assertEqual( format(alignment, "a2m"), """\ >Test1seq GCTGGGGATGGAGAGGGAACAGAGT.T >AT3G20900 GCTGGGGATGGAGAGGGAACAGAGTaG """, ) self.assertTrue( np.array_equal( np.array(alignment, "U"), # fmt: off np.array([['G', 'C', 'T', 'G', 'G', 'G', 'G', 'A', 'T', 'G', 'G', 'A', 'G', 'A', 'G', 'G', 'G', 'A', 'A', 'C', 'A', 'G', 'A', 'G', 'T', '-', 'T'], ['G', 'C', 'T', 'G', 'G', 'G', 'G', 'A', 'T', 'G', 'G', 'A', 'G', 'A', 'G', 'G', 'G', 'A', 'A', 'C', 'A', 'G', 'A', 'G', 'T', 'A', 'G'] ], dtype='U') # fmt: on ) ) counts = alignment.counts() self.assertEqual( repr(counts), "" % id(counts), ) self.assertEqual( str(counts), """\ AlignmentCounts object with aligned = 26: identities = 25, mismatches = 1. gaps = 1: left_gaps = 0: left_insertions = 0: open_left_insertions = 0, extend_left_insertions = 0; left_deletions = 0: open_left_deletions = 0, extend_left_deletions = 0; internal_gaps = 1: internal_insertions = 1: open_internal_insertions = 1, extend_internal_insertions = 0; internal_deletions = 0: open_internal_deletions = 0, extend_internal_deletions = 0; right_gaps = 0: right_insertions = 0: open_right_insertions = 0, extend_right_insertions = 0; right_deletions = 0: open_right_deletions = 0, extend_right_deletions = 0. """, ) self.assertEqual(counts.left_insertions, 0) self.assertEqual(counts.left_deletions, 0) self.assertEqual(counts.right_insertions, 0) self.assertEqual(counts.right_deletions, 0) self.assertEqual(counts.internal_insertions, 1) self.assertEqual(counts.internal_deletions, 0) self.assertEqual(counts.left_gaps, 0) self.assertEqual(counts.right_gaps, 0) self.assertEqual(counts.internal_gaps, 1) self.assertEqual(counts.insertions, 1) self.assertEqual(counts.deletions, 0) self.assertEqual(counts.gaps, 1) self.assertEqual(counts.aligned, 26) self.assertEqual(counts.identities, 25) self.assertEqual(counts.mismatches, 1) self.check_reading_writing(path) def test_probcons(self): path = "Clustalw/probcons.a2m" # example taken from the PROBCONS documentation, # and converted to aligned A2M format. with open(path) as stream: alignments = Align.parse(stream, "a2m") alignment = next(alignments) with self.assertRaises(StopIteration): next(alignments) self.assertEqual( repr(alignment), "" % id(alignment), ) self.assertEqual(len(alignment), 5) self.assertEqual(alignment.sequences[0].id, "plas_horvu") self.assertEqual(alignment.sequences[1].id, "plas_chlre") self.assertEqual(alignment.sequences[2].id, "plas_anava") self.assertEqual(alignment.sequences[3].id, "plas_proho") self.assertEqual(alignment.sequences[4].id, "azup_achcy") self.assertEqual( alignment.sequences[0].seq, "DVLLGANGGVLVFEPNDFSVKAGETITFKNNAGYPHNVVFDEDAVPSGVDVSKISQEEYLTAPGETFSVTLTVPGTYGFYCEPHAGAGMVGKVTV", ) self.assertEqual( alignment.sequences[1].seq, "VKLGADSGALEFVPKTLTIKSGETVNFVNNAGFPHNIVFDEDAIPSGVNADAISRDDYLNAPGETYSVKLTAAGEYGYYCEPHQGAGMVGKIIV", ) self.assertEqual( alignment.sequences[2].seq, "VKLGSDKGLLVFEPAKLTIKPGDTVEFLNNKVPPHNVVFDAALNPAKSADLAKSLSHKQLLMSPGQSTSTTFPADAPAGEYTFYCEPHRGAGMVGKITV", ) self.assertEqual( alignment.sequences[3].seq, "VQIKMGTDKYAPLYEPKALSISAGDTVEFVMNKVGPHNVIFDKVPAGESAPALSNTKLRIAPGSFYSVTLGTPGTYSFYCTPHRGAGMVGTITV", ) self.assertEqual( alignment.sequences[4].seq, "VHMLNKGKDGAMVFEPASLKVAPGDTVTFIPTDKGHNVETIKGMIPDGAEAFKSKINENYKVTFTAPGVYGVKCTPHYGMGMVGVVEV", ) self.assertEqual( alignment[0], "D-VLLGANGGVLVFEPNDFSVKAGETITFKNNAGYPHNVVFDEDAVPSG-VD-VSKISQEEYLTAPGETFSVTLTV---PGTYGFYCEPHAGAGMVGKVTV", ) self.assertEqual( alignment[1], "--VKLGADSGALEFVPKTLTIKSGETVNFVNNAGFPHNIVFDEDAIPSG-VN-ADAISRDDYLNAPGETYSVKLTA---AGEYGYYCEPHQGAGMVGKIIV", ) self.assertEqual( alignment[2], "--VKLGSDKGLLVFEPAKLTIKPGDTVEFLNNKVPPHNVVFDAALNPAKSADLAKSLSHKQLLMSPGQSTSTTFPADAPAGEYTFYCEPHRGAGMVGKITV", ) self.assertEqual( alignment[3], "VQIKMGTDKYAPLYEPKALSISAGDTVEFVMNKVGPHNVIFDK--VPAG-ES-APALSNTKLRIAPGSFYSVTLGT---PGTYSFYCTPHRGAGMVGTITV", ) self.assertEqual( alignment[4], "VHMLNKGKDGAMVFEPASLKVAPGDTVTFIPTDK-GHNVETIKGMIPDG-AE-A-------FKSKINENYKVTFTA---PGVYGVKCTPHYGMGMVGVVEV", ) self.assertEqual( alignment.column_annotations["state"], "DIDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDIDDIDDDDDDDDDDDDDDDDDDDDDDDIIIDDDDDDDDDDDDDDDDDDDDDD", ) self.assertEqual( str(alignment), """\ plas_horv 0 D-VLLGANGGVLVFEPNDFSVKAGETITFKNNAGYPHNVVFDEDAVPSG-VD-VSKISQE plas_chlr 0 --VKLGADSGALEFVPKTLTIKSGETVNFVNNAGFPHNIVFDEDAIPSG-VN-ADAISRD plas_anav 0 --VKLGSDKGLLVFEPAKLTIKPGDTVEFLNNKVPPHNVVFDAALNPAKSADLAKSLSHK plas_proh 0 VQIKMGTDKYAPLYEPKALSISAGDTVEFVMNKVGPHNVIFDK--VPAG-ES-APALSNT azup_achc 0 VHMLNKGKDGAMVFEPASLKVAPGDTVTFIPTDK-GHNVETIKGMIPDG-AE-A------ plas_horv 57 EYLTAPGETFSVTLTV---PGTYGFYCEPHAGAGMVGKVTV 95 plas_chlr 56 DYLNAPGETYSVKLTA---AGEYGYYCEPHQGAGMVGKIIV 94 plas_anav 58 QLLMSPGQSTSTTFPADAPAGEYTFYCEPHRGAGMVGKITV 99 plas_proh 56 KLRIAPGSFYSVTLGT---PGTYSFYCTPHRGAGMVGTITV 94 azup_achc 51 -FKSKINENYKVTFTA---PGVYGVKCTPHYGMGMVGVVEV 88 """, ) self.assertEqual( format(alignment, "a2m"), """\ >plas_horvu D.VLLGANGGVLVFEPNDFSVKAGETITFKNNAGYPHNVVFDEDAVPSG.VD.VSKISQEEYLTAPGETFSVTLTV...PGTYGFYCEPHAGAGMVGKVTV >plas_chlre -.VKLGADSGALEFVPKTLTIKSGETVNFVNNAGFPHNIVFDEDAIPSG.VN.ADAISRDDYLNAPGETYSVKLTA...AGEYGYYCEPHQGAGMVGKIIV >plas_anava -.VKLGSDKGLLVFEPAKLTIKPGDTVEFLNNKVPPHNVVFDAALNPAKsADlAKSLSHKQLLMSPGQSTSTTFPAdapAGEYTFYCEPHRGAGMVGKITV >plas_proho VqIKMGTDKYAPLYEPKALSISAGDTVEFVMNKVGPHNVIFDK--VPAG.ES.APALSNTKLRIAPGSFYSVTLGT...PGTYSFYCTPHRGAGMVGTITV >azup_achcy VhMLNKGKDGAMVFEPASLKVAPGDTVTFIPTDK-GHNVETIKGMIPDG.AE.A-------FKSKINENYKVTFTA...PGVYGVKCTPHYGMGMVGVVEV """, ) self.assertTrue( np.array_equal( np.array(alignment, "U"), # fmt: off np.array([['D', '-', 'V', 'L', 'L', 'G', 'A', 'N', 'G', 'G', 'V', 'L', 'V', 'F', 'E', 'P', 'N', 'D', 'F', 'S', 'V', 'K', 'A', 'G', 'E', 'T', 'I', 'T', 'F', 'K', 'N', 'N', 'A', 'G', 'Y', 'P', 'H', 'N', 'V', 'V', 'F', 'D', 'E', 'D', 'A', 'V', 'P', 'S', 'G', '-', 'V', 'D', '-', 'V', 'S', 'K', 'I', 'S', 'Q', 'E', 'E', 'Y', 'L', 'T', 'A', 'P', 'G', 'E', 'T', 'F', 'S', 'V', 'T', 'L', 'T', 'V', '-', '-', '-', 'P', 'G', 'T', 'Y', 'G', 'F', 'Y', 'C', 'E', 'P', 'H', 'A', 'G', 'A', 'G', 'M', 'V', 'G', 'K', 'V', 'T', 'V'], ['-', '-', 'V', 'K', 'L', 'G', 'A', 'D', 'S', 'G', 'A', 'L', 'E', 'F', 'V', 'P', 'K', 'T', 'L', 'T', 'I', 'K', 'S', 'G', 'E', 'T', 'V', 'N', 'F', 'V', 'N', 'N', 'A', 'G', 'F', 'P', 'H', 'N', 'I', 'V', 'F', 'D', 'E', 'D', 'A', 'I', 'P', 'S', 'G', '-', 'V', 'N', '-', 'A', 'D', 'A', 'I', 'S', 'R', 'D', 'D', 'Y', 'L', 'N', 'A', 'P', 'G', 'E', 'T', 'Y', 'S', 'V', 'K', 'L', 'T', 'A', '-', '-', '-', 'A', 'G', 'E', 'Y', 'G', 'Y', 'Y', 'C', 'E', 'P', 'H', 'Q', 'G', 'A', 'G', 'M', 'V', 'G', 'K', 'I', 'I', 'V'], ['-', '-', 'V', 'K', 'L', 'G', 'S', 'D', 'K', 'G', 'L', 'L', 'V', 'F', 'E', 'P', 'A', 'K', 'L', 'T', 'I', 'K', 'P', 'G', 'D', 'T', 'V', 'E', 'F', 'L', 'N', 'N', 'K', 'V', 'P', 'P', 'H', 'N', 'V', 'V', 'F', 'D', 'A', 'A', 'L', 'N', 'P', 'A', 'K', 'S', 'A', 'D', 'L', 'A', 'K', 'S', 'L', 'S', 'H', 'K', 'Q', 'L', 'L', 'M', 'S', 'P', 'G', 'Q', 'S', 'T', 'S', 'T', 'T', 'F', 'P', 'A', 'D', 'A', 'P', 'A', 'G', 'E', 'Y', 'T', 'F', 'Y', 'C', 'E', 'P', 'H', 'R', 'G', 'A', 'G', 'M', 'V', 'G', 'K', 'I', 'T', 'V'], ['V', 'Q', 'I', 'K', 'M', 'G', 'T', 'D', 'K', 'Y', 'A', 'P', 'L', 'Y', 'E', 'P', 'K', 'A', 'L', 'S', 'I', 'S', 'A', 'G', 'D', 'T', 'V', 'E', 'F', 'V', 'M', 'N', 'K', 'V', 'G', 'P', 'H', 'N', 'V', 'I', 'F', 'D', 'K', '-', '-', 'V', 'P', 'A', 'G', '-', 'E', 'S', '-', 'A', 'P', 'A', 'L', 'S', 'N', 'T', 'K', 'L', 'R', 'I', 'A', 'P', 'G', 'S', 'F', 'Y', 'S', 'V', 'T', 'L', 'G', 'T', '-', '-', '-', 'P', 'G', 'T', 'Y', 'S', 'F', 'Y', 'C', 'T', 'P', 'H', 'R', 'G', 'A', 'G', 'M', 'V', 'G', 'T', 'I', 'T', 'V'], ['V', 'H', 'M', 'L', 'N', 'K', 'G', 'K', 'D', 'G', 'A', 'M', 'V', 'F', 'E', 'P', 'A', 'S', 'L', 'K', 'V', 'A', 'P', 'G', 'D', 'T', 'V', 'T', 'F', 'I', 'P', 'T', 'D', 'K', '-', 'G', 'H', 'N', 'V', 'E', 'T', 'I', 'K', 'G', 'M', 'I', 'P', 'D', 'G', '-', 'A', 'E', '-', 'A', '-', '-', '-', '-', '-', '-', '-', 'F', 'K', 'S', 'K', 'I', 'N', 'E', 'N', 'Y', 'K', 'V', 'T', 'F', 'T', 'A', '-', '-', '-', 'P', 'G', 'V', 'Y', 'G', 'V', 'K', 'C', 'T', 'P', 'H', 'Y', 'G', 'M', 'G', 'M', 'V', 'G', 'V', 'V', 'E', 'V']], dtype='U') # fmt: on ) ) counts = alignment.counts(substitution_matrix) self.assertEqual( repr(counts), "" % id(counts), ) self.assertEqual( str(counts), """\ AlignmentCounts object with substitution_score = 2197.0, aligned = 904: identities = 427, positives = 554, mismatches = 477. gaps = 72: left_gaps = 10: left_insertions = 8: open_left_insertions = 4, extend_left_insertions = 4; left_deletions = 2: open_left_deletions = 2, extend_left_deletions = 0; internal_gaps = 62: internal_insertions = 14: open_internal_insertions = 9, extend_internal_insertions = 5; internal_deletions = 48: open_internal_deletions = 17, extend_internal_deletions = 31; right_gaps = 0: right_insertions = 0: open_right_insertions = 0, extend_right_insertions = 0; right_deletions = 0: open_right_deletions = 0, extend_right_deletions = 0. """, ) self.assertEqual(counts.left_insertions, 8) self.assertEqual(counts.left_deletions, 2) self.assertEqual(counts.right_insertions, 0) self.assertEqual(counts.right_deletions, 0) self.assertEqual(counts.internal_insertions, 14) self.assertEqual(counts.internal_deletions, 48) self.assertEqual(counts.left_gaps, 10) self.assertEqual(counts.right_gaps, 0) self.assertEqual(counts.internal_gaps, 62) self.assertEqual(counts.insertions, 22) self.assertEqual(counts.deletions, 50) self.assertEqual(counts.gaps, 72) self.assertEqual(counts.aligned, 904) self.assertEqual(counts.identities, 427) self.assertEqual(counts.mismatches, 477) self.assertEqual(counts.positives, 554) self.check_reading_writing(path) def test_empty(self): """Checking empty file.""" stream = StringIO() alignments = Align.parse(stream, "a2m") with self.assertRaises(ValueError) as cm: next(alignments) self.assertEqual(str(cm.exception), "Empty file.") if __name__ == "__main__": runner = unittest.TextTestRunner(verbosity=2) unittest.main(testRunner=runner)