Files
biopython/Tests/test_align.py
2025-01-15 18:38:28 +00:00

920 lines
66 KiB
Python

# Copyright 2000-2001 by Brad Chapman. All rights reserved.
# Revisions copyright 2007-2003 by Peter Cock. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""Test alignment stuff.
Right now we've got tests for:
- Reading and Writing clustal format
- Reading and Writing fasta format
- Converting between formats
"""
# standard library
import os
import unittest
import warnings
from io import StringIO
from Bio import Align
from Bio import AlignIO
# biopython
from Bio import BiopythonDeprecationWarning
from Bio import motifs
from Bio.Align import AlignInfo
from Bio.Align import Alignment
from Bio.Align import MultipleSeqAlignment
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
class TestBasics(unittest.TestCase):
def test_empty_alignment(self):
"""Very simple tests on an empty alignment."""
alignment = MultipleSeqAlignment([])
self.assertEqual(alignment.get_alignment_length(), 0)
self.assertEqual(len(alignment), 0)
alignment = alignment.alignment # new-style Alignment object
self.assertEqual(alignment.length, 0)
self.assertEqual(len(alignment), 0)
def test_basic_alignment(self):
"""Basic tests on a simple alignment of three sequences."""
msa = MultipleSeqAlignment([])
letters = "AbcDefGhiJklMnoPqrStuVwxYz"
msa.append(SeqRecord(Seq(letters), id="mixed"))
msa.append(SeqRecord(Seq(letters.lower()), id="lower"))
msa.append(SeqRecord(Seq(letters.upper()), id="upper"))
msa.append(SeqRecord(Seq(letters), id="duplicate"))
del msa[3]
self.assertEqual(msa.get_alignment_length(), 26)
self.assertEqual(len(msa), 3)
self.assertEqual(msa[0].seq, letters)
self.assertEqual(msa[1].seq, letters.lower())
self.assertEqual(msa[2].seq, letters.upper())
self.assertEqual(msa[0].id, "mixed")
self.assertEqual(msa[1].id, "lower")
self.assertEqual(msa[2].id, "upper")
for col, letter in enumerate(letters):
self.assertEqual(msa[:, col], letter + letter.lower() + letter.upper())
# Check row extractions:
self.assertEqual(msa[0].id, "mixed")
self.assertEqual(msa[-1].id, "upper")
# Check sub-alignment extraction by row slicing:
self.assertIsInstance(msa[::-1], MultipleSeqAlignment)
self.assertEqual(msa[::-1][0].id, "upper")
self.assertEqual(msa[::-1][2].id, "mixed")
# create a new-style Alignment object
alignment = msa.alignment
self.assertEqual(alignment.shape, (3, 26))
self.assertEqual(len(alignment), 3)
self.assertEqual(alignment.sequences[0].seq, letters)
self.assertEqual(alignment.sequences[1].seq, letters.lower())
self.assertEqual(alignment.sequences[2].seq, letters.upper())
self.assertEqual(alignment.sequences[0].id, "mixed")
self.assertEqual(alignment.sequences[1].id, "lower")
self.assertEqual(alignment.sequences[2].id, "upper")
for col, letter in enumerate(letters):
self.assertEqual(
alignment[:, col], letter + letter.lower() + letter.upper()
)
# Check row extractions:
self.assertEqual(alignment[0], letters)
self.assertEqual(alignment[-1], letters.upper())
# Check sub-alignment extraction by row slicing:
self.assertIsInstance(alignment[::-1], Alignment)
self.assertEqual(alignment[::-1].sequences[0].id, "upper")
self.assertEqual(alignment[::-1].sequences[2].id, "mixed")
class TestReading(unittest.TestCase):
def test_read_clustal1(self):
"""Parse an alignment file and get an alignment object."""
opuntia_clustal_header = """\
CLUSTAL X (1.81) multiple sequence alignment
"""
opuntia_clustal_body = """\
gi|6273285|gb|AF191659.1|AF191 TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAA
gi|6273284|gb|AF191658.1|AF191 TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAA
gi|6273287|gb|AF191661.1|AF191 TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAA
gi|6273286|gb|AF191660.1|AF191 TATACATAAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAA
gi|6273290|gb|AF191664.1|AF191 TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAA
gi|6273289|gb|AF191663.1|AF191 TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAA
gi|6273291|gb|AF191665.1|AF191 TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAA
******* **** *************************************
gi|6273285|gb|AF191659.1|AF191 TATATA----------ATATATTTCAAATTTCCTTATATACCCAAATATA
gi|6273284|gb|AF191658.1|AF191 TATATATA--------ATATATTTCAAATTTCCTTATATACCCAAATATA
gi|6273287|gb|AF191661.1|AF191 TATATA----------ATATATTTCAAATTTCCTTATATATCCAAATATA
gi|6273286|gb|AF191660.1|AF191 TATATA----------ATATATTTATAATTTCCTTATATATCCAAATATA
gi|6273290|gb|AF191664.1|AF191 TATATATATA------ATATATTTCAAATTCCCTTATATATCCAAATATA
gi|6273289|gb|AF191663.1|AF191 TATATATATA------ATATATTTCAAATTCCCTTATATATCCAAATATA
gi|6273291|gb|AF191665.1|AF191 TATATATATATATATAATATATTTCAAATTCCCTTATATATCCAAATATA
****** ******** **** ********* *********
gi|6273285|gb|AF191659.1|AF191 AAAATATCTAATAAATTAGATGAATATCAAAGAATCCATTGATTTAGTGT
gi|6273284|gb|AF191658.1|AF191 AAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGT
gi|6273287|gb|AF191661.1|AF191 AAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGT
gi|6273286|gb|AF191660.1|AF191 AAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGT
gi|6273290|gb|AF191664.1|AF191 AAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGT
gi|6273289|gb|AF191663.1|AF191 AAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTAT
gi|6273291|gb|AF191665.1|AF191 AAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGT
************************************ *********** *
gi|6273285|gb|AF191659.1|AF191 ACCAGA
gi|6273284|gb|AF191658.1|AF191 ACCAGA
gi|6273287|gb|AF191661.1|AF191 ACCAGA
gi|6273286|gb|AF191660.1|AF191 ACCAGA
gi|6273290|gb|AF191664.1|AF191 ACCAGA
gi|6273289|gb|AF191663.1|AF191 ACCAGA
gi|6273291|gb|AF191665.1|AF191 ACCAGA
******
""" # noqa : W291
opuntia_fasta = """\
>gi|6273285|gb|AF191659.1|AF191
TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATA----
------ATATATTTCAAATTTCCTTATATACCCAAATATAAAAATATCTAATAAATTAGA
TGAATATCAAAGAATCCATTGATTTAGTGTACCAGA
>gi|6273284|gb|AF191658.1|AF191
TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATA--
------ATATATTTCAAATTTCCTTATATACCCAAATATAAAAATATCTAATAAATTAGA
TGAATATCAAAGAATCTATTGATTTAGTGTACCAGA
>gi|6273287|gb|AF191661.1|AF191
TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATA----
------ATATATTTCAAATTTCCTTATATATCCAAATATAAAAATATCTAATAAATTAGA
TGAATATCAAAGAATCTATTGATTTAGTGTACCAGA
>gi|6273286|gb|AF191660.1|AF191
TATACATAAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATA----
------ATATATTTATAATTTCCTTATATATCCAAATATAAAAATATCTAATAAATTAGA
TGAATATCAAAGAATCTATTGATTTAGTGTACCAGA
>gi|6273290|gb|AF191664.1|AF191
TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATATA
------ATATATTTCAAATTCCCTTATATATCCAAATATAAAAATATCTAATAAATTAGA
TGAATATCAAAGAATCTATTGATTTAGTGTACCAGA
>gi|6273289|gb|AF191663.1|AF191
TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATATA
------ATATATTTCAAATTCCCTTATATATCCAAATATAAAAATATCTAATAAATTAGA
TGAATATCAAAGAATCTATTGATTTAGTATACCAGA
>gi|6273291|gb|AF191665.1|AF191
TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATATA
TATATAATATATTTCAAATTCCCTTATATATCCAAATATAAAAATATCTAATAAATTAGA
TGAATATCAAAGAATCTATTGATTTAGTGTACCAGA
"""
opuntia_fasta_oneline = """\
>gi|6273285|gb|AF191659.1|AF191
TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATA----------ATATATTTCAAATTTCCTTATATACCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCCATTGATTTAGTGTACCAGA
>gi|6273284|gb|AF191658.1|AF191
TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATA--------ATATATTTCAAATTTCCTTATATACCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA
>gi|6273287|gb|AF191661.1|AF191
TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATA----------ATATATTTCAAATTTCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA
>gi|6273286|gb|AF191660.1|AF191
TATACATAAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATA----------ATATATTTATAATTTCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA
>gi|6273290|gb|AF191664.1|AF191
TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATATA------ATATATTTCAAATTCCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA
>gi|6273289|gb|AF191663.1|AF191
TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATATA------ATATATTTCAAATTCCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTATACCAGA
>gi|6273291|gb|AF191665.1|AF191
TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATATATATATAATATATTTCAAATTCCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA
"""
opuntia_fasta_oneline_with_description = """\
>gi|6273285|gb|AF191659.1|AF191 gi|6273285|gb|AF191659.1|AF191
TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATA----------ATATATTTCAAATTTCCTTATATACCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCCATTGATTTAGTGTACCAGA
>gi|6273284|gb|AF191658.1|AF191 gi|6273284|gb|AF191658.1|AF191
TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATA--------ATATATTTCAAATTTCCTTATATACCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA
>gi|6273287|gb|AF191661.1|AF191 gi|6273287|gb|AF191661.1|AF191
TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATA----------ATATATTTCAAATTTCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA
>gi|6273286|gb|AF191660.1|AF191 gi|6273286|gb|AF191660.1|AF191
TATACATAAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATA----------ATATATTTATAATTTCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA
>gi|6273290|gb|AF191664.1|AF191 gi|6273290|gb|AF191664.1|AF191
TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATATA------ATATATTTCAAATTCCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA
>gi|6273289|gb|AF191663.1|AF191 gi|6273289|gb|AF191663.1|AF191
TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATATA------ATATATTTCAAATTCCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTATACCAGA
>gi|6273291|gb|AF191665.1|AF191 gi|6273291|gb|AF191665.1|AF191
TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATATATATATAATATATTTCAAATTCCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA
"""
path = os.path.join(os.getcwd(), "Clustalw", "opuntia.aln")
msa = AlignIO.read(path, "clustal")
opuntia_clustal = opuntia_clustal_header + opuntia_clustal_body
self.assertEqual(format(msa, "clustal"), opuntia_clustal)
self.assertEqual(format(msa, "fasta"), opuntia_fasta)
# create a new-style Alignment object
alignment = msa.alignment
self.assertEqual(format(alignment, "clustal"), opuntia_clustal_body)
# New-style Alignment objects generate FASTA format with the sequence
# on one line. Also, the clustal parser in Bio.AlignIO generates
# SeqRecords with an (identical) ID and a description; the clustal
# parser in Bio.Align generates SeqRecords with an ID only.
self.assertEqual(
format(alignment, "fasta"), opuntia_fasta_oneline_with_description
)
alignment = Align.read(path, "clustal")
self.assertEqual(format(alignment, "fasta"), opuntia_fasta_oneline)
def test_read_clustal2(self):
"""Parse an alignment file and get an alignment object."""
clustalw_clustal_header = """\
CLUSTAL X (1.81) multiple sequence alignment
"""
clustalw_clustal_body = """\
gi|4959044|gb|AAD34209.1|AF069 MENSDSNDKGSDQSAAQRRSQMDRLDREEAFYQFVNNLSEEDYRLMRDNN
gi|671626|emb|CAA85685.1| ---------MSPQTETKASVGFKAGVKEYKLTYYTPEYETKDTDILAAFR
* *: :: :. :* : :. : . :* :: .
gi|4959044|gb|AAD34209.1|AF069 LLGTPGESTEEELLRRLQQIKEGPPPQSPDENRAGESSDDVTNSDSIIDW
gi|671626|emb|CAA85685.1| VTPQPG-----------------VPPEEAGAAVAAESSTGT---------
: ** **:... *.*** ..
gi|4959044|gb|AAD34209.1|AF069 LNSVRQTGNTTRSRQRGNQSWRAVSRTNPNSGDFRFSLEINVNRNNGSQT
gi|671626|emb|CAA85685.1| WTTVWTDGLTSLDRYKG-----RCYHIEPVPG------------------
.:* * *: .* :* : :* .*
gi|4959044|gb|AAD34209.1|AF069 SENESEPSTRRLSVENMESSSQRQMENSASESASARPSRAERNSTEAVTE
gi|671626|emb|CAA85685.1| -EKDQCICYVAYPLDLFEEGSVTNMFTSIVGNVFGFKALRALRLEDLRIP
*::. . .:: :*..* :* .* .. . : . :
gi|4959044|gb|AAD34209.1|AF069 VPTTRAQRRARSRSPEHRRTRARAERSMSPLQPTSEIPRRAPTLEQSSEN
gi|671626|emb|CAA85685.1| VAYVKTFQGPPHGIQVERDKLNKYGRPLLGCTIKPKLGLSAKNYGRAVYE
*. .:: : . .* . : *.: ..:: * . :: :
gi|4959044|gb|AAD34209.1|AF069 EPEGSSRTRHHVTLRQQISGPELLGRGLFAASGSRNPSQGTSSSDTGSNS
gi|671626|emb|CAA85685.1| CLRGGLDFTKDDENVNSQPFMRWRDRFLFCAEAIYKAQAETGEIKGHYLN
.*. :. :. . . .* **.*.. :.. *.. . .
gi|4959044|gb|AAD34209.1|AF069 ESSGSGQRPPTIVLDLQVRRVRPGEYRQRDSIASRTRSRSQAPNNTVTYE
gi|671626|emb|CAA85685.1| ATAG-----------------------TCEEMIKRAIFARELGVPIVMHD
::* :.: .*: : * ::
gi|4959044|gb|AAD34209.1|AF069 SERGGFRRTFSRSERAGVRTYVSTIRIPIRRILNTGLSETTSVAIQTMLR
gi|671626|emb|CAA85685.1| YLTGGFTANTSLAHYCRDNGLLLHIHRAMHAVIDRQKNHGMHFRVLAKAL
*** . * :. . . : *: .:: ::: .. . : :
gi|4959044|gb|AAD34209.1|AF069 QIMTGFGELSYFMYSDSDSEPSASVSSRNVERVESRNGRGSSGGGNSSGS
gi|671626|emb|CAA85685.1| RLSGGDHIHSGTVVGKLEGERDITLGFVDLLRDDFIEKDRSRGIYFTQDW
:: * * : .. :.* . ::. :: * : : * * :..
gi|4959044|gb|AAD34209.1|AF069 SSSSSPSPSSSGESSESSSKMFEGSSEGGSSGPSRKDGRHRAPVTFDESG
gi|671626|emb|CAA85685.1| VSLPGVIPVASG-----------------------------GIHVWHMPA
* .. * :** . .:. ..
gi|4959044|gb|AAD34209.1|AF069 SLPFFSLAQFFLLNEDDEDQPRGLTKEQIDNLAMRSFGENDALKTCSVCI
gi|671626|emb|CAA85685.1| LTEIFGDDSVLQFGGGTLGHPWGNAPGAVANRVA-----------VEACV
:*. ..: :. . .:* * : : * . ..*:
gi|4959044|gb|AAD34209.1|AF069 TEYTEGDKLRKLPCSHEFHVHCIDRWLSE-NSTCPICRRAVLSSGNRESV
gi|671626|emb|CAA85685.1| KARNEG---RDLAAEGNAIIREACKWSPELAAACEVWKEIKFEFPAMD--
. .** *.*... : :: :* .* ::* : :. :. :
gi|4959044|gb|AAD34209.1|AF069 V
gi|671626|emb|CAA85685.1| -
""" # noqa : W291
path = os.path.join(os.curdir, "Clustalw", "clustalw.aln")
msa = AlignIO.read(path, "clustal")
clustalw_clustal = clustalw_clustal_header + clustalw_clustal_body
self.assertEqual(format(msa, "clustal"), clustalw_clustal)
# create a new-style Alignment object
alignment = msa.alignment
self.assertEqual(format(alignment, "clustal"), clustalw_clustal_body)
def test_read_write_clustal(self):
"""Test the base alignment stuff."""
path = os.path.join(os.getcwd(), "Clustalw", "opuntia.aln")
msa = AlignIO.read(path, "clustal")
self.assertEqual(len(msa), 7)
seq_record = msa[0]
self.assertEqual(seq_record.description, "gi|6273285|gb|AF191659.1|AF191")
self.assertEqual(
seq_record.seq,
Seq(
"TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATA----------ATATATTTCAAATTTCCTTATATACCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCCATTGATTTAGTGTACCAGA"
),
)
seq_record = msa[1]
self.assertEqual(seq_record.description, "gi|6273284|gb|AF191658.1|AF191")
self.assertEqual(
seq_record.seq,
"TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATA--------ATATATTTCAAATTTCCTTATATACCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA",
)
seq_record = msa[2]
self.assertEqual(seq_record.description, "gi|6273287|gb|AF191661.1|AF191")
self.assertEqual(
seq_record.seq,
"TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATA----------ATATATTTCAAATTTCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA",
)
seq_record = msa[3]
self.assertEqual(seq_record.description, "gi|6273286|gb|AF191660.1|AF191")
self.assertEqual(
seq_record.seq,
"TATACATAAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATA----------ATATATTTATAATTTCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA",
)
seq_record = msa[4]
self.assertEqual(seq_record.description, "gi|6273290|gb|AF191664.1|AF191")
self.assertEqual(
seq_record.seq,
"TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATATA------ATATATTTCAAATTCCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA",
)
seq_record = msa[5]
self.assertEqual(seq_record.description, "gi|6273289|gb|AF191663.1|AF191")
self.assertEqual(
seq_record.seq,
"TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATATA------ATATATTTCAAATTCCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTATACCAGA",
)
seq_record = msa[6]
self.assertEqual(seq_record.description, "gi|6273291|gb|AF191665.1|AF191")
self.assertEqual(
seq_record.seq,
"TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATATATATATAATATATTTCAAATTCCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA",
)
self.assertEqual(msa.get_alignment_length(), 156)
alignment = msa.alignment
dictionary = alignment.substitutions
self.assertEqual(len(dictionary), 4)
self.assertEqual(dictionary.shape, (4, 4))
self.assertEqual(len(dictionary.keys()), 16)
self.assertAlmostEqual(dictionary[("A", "A")], 1395)
self.assertAlmostEqual(dictionary[("A", "C")], 3)
self.assertAlmostEqual(dictionary[("A", "G")], 13)
self.assertAlmostEqual(dictionary[("A", "T")], 6)
self.assertAlmostEqual(dictionary[("C", "A")], 3)
self.assertAlmostEqual(dictionary[("C", "C")], 271)
self.assertAlmostEqual(dictionary[("C", "G")], 0)
self.assertAlmostEqual(dictionary[("C", "T")], 16)
self.assertAlmostEqual(dictionary[("G", "A")], 5)
self.assertAlmostEqual(dictionary[("G", "C")], 0)
self.assertAlmostEqual(dictionary[("G", "G")], 480)
self.assertAlmostEqual(dictionary[("G", "T")], 0)
self.assertAlmostEqual(dictionary[("T", "A")], 6)
self.assertAlmostEqual(dictionary[("T", "C")], 12)
self.assertAlmostEqual(dictionary[("T", "G")], 0)
self.assertAlmostEqual(dictionary[("T", "T")], 874)
motif = motifs.Motif("ACGT", alignment)
counts = motif.counts
self.assertEqual(
counts.calculate_consensus(identity=0.7),
"TATACATTAAAGNAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATATATATATAATATATTTCAAATTNCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA",
)
self.assertEqual(
str(counts),
"""\
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
A: 0.00 7.00 0.00 7.00 0.00 7.00 0.00 1.00 7.00 7.00 7.00 0.00 4.00 7.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 7.00 7.00 7.00 0.00 0.00 0.00 7.00 7.00 7.00 0.00 0.00 0.00 0.00 7.00 7.00 7.00 0.00 7.00 7.00 7.00 0.00 7.00 7.00 0.00 7.00 0.00 7.00 0.00 7.00 0.00 4.00 0.00 3.00 0.00 1.00 0.00 1.00 0.00 1.00 7.00 0.00 7.00 0.00 7.00 0.00 0.00 0.00 1.00 6.00 7.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 7.00 0.00 7.00 0.00 0.00 0.00 7.00 7.00 7.00 0.00 7.00 0.00 7.00 7.00 7.00 7.00 7.00 0.00 7.00 0.00 0.00 0.00 7.00 7.00 0.00 7.00 7.00 7.00 0.00 0.00 7.00 0.00 7.00 0.00 0.00 7.00 7.00 0.00 7.00 0.00 0.00 7.00 7.00 7.00 0.00 7.00 7.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 7.00 0.00 0.00 1.00 0.00 7.00 0.00 0.00 7.00 0.00 7.00
C: 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 6.00 0.00 0.00 0.00 0.00 0.00 3.00 7.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 2.00 7.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 1.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 7.00 0.00 0.00 0.00
G: 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 3.00 0.00 7.00 7.00 7.00 7.00 7.00 0.00 0.00 7.00 0.00 7.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 7.00 0.00 0.00 0.00 7.00 7.00 0.00 7.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 6.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00
T: 7.00 0.00 7.00 0.00 0.00 0.00 7.00 6.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 7.00 0.00 7.00 0.00 4.00 0.00 3.00 0.00 1.00 0.00 1.00 0.00 1.00 0.00 0.00 7.00 0.00 7.00 0.00 7.00 7.00 7.00 0.00 1.00 0.00 0.00 7.00 7.00 4.00 0.00 0.00 7.00 7.00 0.00 7.00 0.00 7.00 0.00 5.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 7.00 0.00 7.00 0.00 0.00 7.00 0.00 0.00 0.00 7.00 7.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 7.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 6.00 0.00 7.00 7.00 0.00 0.00 7.00 7.00 7.00 0.00 0.00 7.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00
""",
)
alignment = msa.alignment
motif = motifs.Motif("ACGT", alignment)
counts = motif.counts
self.assertEqual(
str(counts),
"""\
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
A: 0.00 7.00 0.00 7.00 0.00 7.00 0.00 1.00 7.00 7.00 7.00 0.00 4.00 7.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 7.00 7.00 7.00 0.00 0.00 0.00 7.00 7.00 7.00 0.00 0.00 0.00 0.00 7.00 7.00 7.00 0.00 7.00 7.00 7.00 0.00 7.00 7.00 0.00 7.00 0.00 7.00 0.00 7.00 0.00 4.00 0.00 3.00 0.00 1.00 0.00 1.00 0.00 1.00 7.00 0.00 7.00 0.00 7.00 0.00 0.00 0.00 1.00 6.00 7.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 7.00 0.00 7.00 0.00 0.00 0.00 7.00 7.00 7.00 0.00 7.00 0.00 7.00 7.00 7.00 7.00 7.00 0.00 7.00 0.00 0.00 0.00 7.00 7.00 0.00 7.00 7.00 7.00 0.00 0.00 7.00 0.00 7.00 0.00 0.00 7.00 7.00 0.00 7.00 0.00 0.00 7.00 7.00 7.00 0.00 7.00 7.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 7.00 0.00 0.00 1.00 0.00 7.00 0.00 0.00 7.00 0.00 7.00
C: 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 6.00 0.00 0.00 0.00 0.00 0.00 3.00 7.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 2.00 7.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 1.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 7.00 0.00 0.00 0.00
G: 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 3.00 0.00 7.00 7.00 7.00 7.00 7.00 0.00 0.00 7.00 0.00 7.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 7.00 0.00 0.00 0.00 7.00 7.00 0.00 7.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 6.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00
T: 7.00 0.00 7.00 0.00 0.00 0.00 7.00 6.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 7.00 0.00 7.00 0.00 4.00 0.00 3.00 0.00 1.00 0.00 1.00 0.00 1.00 0.00 0.00 7.00 0.00 7.00 0.00 7.00 7.00 7.00 0.00 1.00 0.00 0.00 7.00 7.00 4.00 0.00 0.00 7.00 7.00 0.00 7.00 0.00 7.00 0.00 5.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 7.00 0.00 7.00 0.00 0.00 7.00 0.00 0.00 0.00 7.00 7.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 7.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 6.00 0.00 7.00 7.00 0.00 0.00 7.00 7.00 7.00 0.00 0.00 7.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00
""",
)
align_info = AlignInfo.SummaryInfo(msa)
self.assertEqual(align_info.get_column(1), "AAAAAAA")
self.assertEqual(align_info.get_column(7), "TTTATTT")
alignment = msa.alignment
motif = motifs.Motif("ACGT", alignment)
counts = motif.counts
self.assertEqual(
str(counts),
"""\
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
A: 0.00 7.00 0.00 7.00 0.00 7.00 0.00 1.00 7.00 7.00 7.00 0.00 4.00 7.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 7.00 7.00 7.00 0.00 0.00 0.00 7.00 7.00 7.00 0.00 0.00 0.00 0.00 7.00 7.00 7.00 0.00 7.00 7.00 7.00 0.00 7.00 7.00 0.00 7.00 0.00 7.00 0.00 7.00 0.00 4.00 0.00 3.00 0.00 1.00 0.00 1.00 0.00 1.00 7.00 0.00 7.00 0.00 7.00 0.00 0.00 0.00 1.00 6.00 7.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 7.00 0.00 7.00 0.00 0.00 0.00 7.00 7.00 7.00 0.00 7.00 0.00 7.00 7.00 7.00 7.00 7.00 0.00 7.00 0.00 0.00 0.00 7.00 7.00 0.00 7.00 7.00 7.00 0.00 0.00 7.00 0.00 7.00 0.00 0.00 7.00 7.00 0.00 7.00 0.00 0.00 7.00 7.00 7.00 0.00 7.00 7.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 7.00 0.00 0.00 1.00 0.00 7.00 0.00 0.00 7.00 0.00 7.00
C: 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 6.00 0.00 0.00 0.00 0.00 0.00 3.00 7.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 2.00 7.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 1.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 7.00 0.00 0.00 0.00
G: 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 3.00 0.00 7.00 7.00 7.00 7.00 7.00 0.00 0.00 7.00 0.00 7.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 7.00 0.00 0.00 0.00 7.00 7.00 0.00 7.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 6.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00
T: 7.00 0.00 7.00 0.00 0.00 0.00 7.00 6.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 7.00 0.00 7.00 0.00 4.00 0.00 3.00 0.00 1.00 0.00 1.00 0.00 1.00 0.00 0.00 7.00 0.00 7.00 0.00 7.00 7.00 7.00 0.00 1.00 0.00 0.00 7.00 7.00 4.00 0.00 0.00 7.00 7.00 0.00 7.00 0.00 7.00 0.00 5.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 7.00 0.00 7.00 0.00 0.00 7.00 0.00 0.00 0.00 7.00 7.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 7.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 6.00 0.00 7.00 7.00 0.00 0.00 7.00 7.00 7.00 0.00 0.00 7.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00
""",
)
value = sum(motif[5:50].relative_entropy)
self.assertAlmostEqual(value, 88.42309908538343) # Alignment
relative_entropy = motif.relative_entropy
value = sum(relative_entropy)
self.assertAlmostEqual(value, 306.2080592664532) # Alignment
self.assertAlmostEqual(relative_entropy[0], 2.0)
self.assertAlmostEqual(relative_entropy[1], 2.0)
self.assertAlmostEqual(relative_entropy[2], 2.0)
self.assertAlmostEqual(relative_entropy[3], 2.0)
self.assertAlmostEqual(relative_entropy[4], 2.0)
self.assertAlmostEqual(relative_entropy[5], 2.0)
self.assertAlmostEqual(relative_entropy[6], 2.0)
self.assertAlmostEqual(relative_entropy[7], 1.4083272214176723)
self.assertAlmostEqual(relative_entropy[8], 2.0)
self.assertAlmostEqual(relative_entropy[9], 2.0)
self.assertAlmostEqual(relative_entropy[10], 2.0)
self.assertAlmostEqual(relative_entropy[11], 2.0)
self.assertAlmostEqual(relative_entropy[12], 1.0147718639657484)
self.assertAlmostEqual(relative_entropy[13], 2.0)
self.assertAlmostEqual(relative_entropy[14], 2.0)
self.assertAlmostEqual(relative_entropy[15], 2.0)
self.assertAlmostEqual(relative_entropy[16], 2.0)
self.assertAlmostEqual(relative_entropy[17], 2.0)
self.assertAlmostEqual(relative_entropy[18], 2.0)
self.assertAlmostEqual(relative_entropy[19], 2.0)
self.assertAlmostEqual(relative_entropy[20], 2.0)
self.assertAlmostEqual(relative_entropy[21], 2.0)
self.assertAlmostEqual(relative_entropy[22], 2.0)
self.assertAlmostEqual(relative_entropy[23], 2.0)
self.assertAlmostEqual(relative_entropy[24], 2.0)
self.assertAlmostEqual(relative_entropy[25], 2.0)
self.assertAlmostEqual(relative_entropy[26], 2.0)
self.assertAlmostEqual(relative_entropy[27], 2.0)
self.assertAlmostEqual(relative_entropy[28], 2.0)
self.assertAlmostEqual(relative_entropy[29], 2.0)
self.assertAlmostEqual(relative_entropy[30], 2.0)
self.assertAlmostEqual(relative_entropy[31], 2.0)
self.assertAlmostEqual(relative_entropy[32], 2.0)
self.assertAlmostEqual(relative_entropy[33], 2.0)
self.assertAlmostEqual(relative_entropy[34], 2.0)
self.assertAlmostEqual(relative_entropy[35], 2.0)
self.assertAlmostEqual(relative_entropy[36], 2.0)
self.assertAlmostEqual(relative_entropy[37], 2.0)
self.assertAlmostEqual(relative_entropy[38], 2.0)
self.assertAlmostEqual(relative_entropy[39], 2.0)
self.assertAlmostEqual(relative_entropy[40], 2.0)
self.assertAlmostEqual(relative_entropy[41], 2.0)
self.assertAlmostEqual(relative_entropy[42], 2.0)
self.assertAlmostEqual(relative_entropy[43], 2.0)
self.assertAlmostEqual(relative_entropy[44], 2.0)
self.assertAlmostEqual(relative_entropy[45], 2.0)
self.assertAlmostEqual(relative_entropy[46], 2.0)
self.assertAlmostEqual(relative_entropy[47], 2.0)
self.assertAlmostEqual(relative_entropy[48], 2.0)
self.assertAlmostEqual(relative_entropy[49], 2.0)
self.assertAlmostEqual(relative_entropy[50], 2.0)
self.assertAlmostEqual(relative_entropy[51], 2.0)
self.assertAlmostEqual(relative_entropy[52], 2.0)
self.assertAlmostEqual(relative_entropy[53], 2.0)
self.assertAlmostEqual(relative_entropy[54], 2.0)
self.assertAlmostEqual(relative_entropy[55], 2.0)
self.assertAlmostEqual(relative_entropy[56], 2.0)
self.assertAlmostEqual(relative_entropy[57], 2.0)
self.assertAlmostEqual(relative_entropy[58], 2.0)
self.assertAlmostEqual(relative_entropy[59], 2.0)
self.assertAlmostEqual(relative_entropy[60], 2.0)
self.assertAlmostEqual(relative_entropy[61], 2.0)
self.assertAlmostEqual(relative_entropy[62], 2.0)
self.assertAlmostEqual(relative_entropy[63], 2.0)
self.assertAlmostEqual(relative_entropy[64], 2.0)
self.assertAlmostEqual(relative_entropy[65], 2.0)
self.assertAlmostEqual(relative_entropy[66], 2.0)
self.assertAlmostEqual(relative_entropy[67], 2.0)
self.assertAlmostEqual(relative_entropy[68], 2.0)
self.assertAlmostEqual(relative_entropy[69], 2.0)
self.assertAlmostEqual(relative_entropy[70], 2.0)
self.assertAlmostEqual(relative_entropy[71], 2.0)
self.assertAlmostEqual(relative_entropy[72], 2.0)
self.assertAlmostEqual(relative_entropy[73], 2.0)
self.assertAlmostEqual(relative_entropy[74], 1.4083272214176723)
self.assertAlmostEqual(relative_entropy[75], 1.4083272214176723)
self.assertAlmostEqual(relative_entropy[76], 2.0)
self.assertAlmostEqual(relative_entropy[77], 2.0)
self.assertAlmostEqual(relative_entropy[78], 2.0)
self.assertAlmostEqual(relative_entropy[79], 2.0)
self.assertAlmostEqual(relative_entropy[80], 1.0147718639657484)
self.assertAlmostEqual(relative_entropy[81], 2.0)
self.assertAlmostEqual(relative_entropy[82], 2.0)
self.assertAlmostEqual(relative_entropy[83], 2.0)
self.assertAlmostEqual(relative_entropy[84], 2.0)
self.assertAlmostEqual(relative_entropy[85], 2.0)
self.assertAlmostEqual(relative_entropy[86], 2.0)
self.assertAlmostEqual(relative_entropy[87], 2.0)
self.assertAlmostEqual(relative_entropy[88], 2.0)
self.assertAlmostEqual(relative_entropy[89], 2.0)
self.assertAlmostEqual(relative_entropy[90], 1.136879431433369)
self.assertAlmostEqual(relative_entropy[91], 2.0)
self.assertAlmostEqual(relative_entropy[92], 2.0)
self.assertAlmostEqual(relative_entropy[93], 2.0)
self.assertAlmostEqual(relative_entropy[94], 2.0)
self.assertAlmostEqual(relative_entropy[95], 2.0)
self.assertAlmostEqual(relative_entropy[96], 2.0)
self.assertAlmostEqual(relative_entropy[97], 2.0)
self.assertAlmostEqual(relative_entropy[98], 2.0)
self.assertAlmostEqual(relative_entropy[99], 2.0)
self.assertAlmostEqual(relative_entropy[100], 2.0)
self.assertAlmostEqual(relative_entropy[101], 2.0)
self.assertAlmostEqual(relative_entropy[102], 2.0)
self.assertAlmostEqual(relative_entropy[103], 2.0)
self.assertAlmostEqual(relative_entropy[104], 2.0)
self.assertAlmostEqual(relative_entropy[105], 2.0)
self.assertAlmostEqual(relative_entropy[106], 2.0)
self.assertAlmostEqual(relative_entropy[107], 2.0)
self.assertAlmostEqual(relative_entropy[108], 2.0)
self.assertAlmostEqual(relative_entropy[109], 2.0)
self.assertAlmostEqual(relative_entropy[110], 2.0)
self.assertAlmostEqual(relative_entropy[111], 2.0)
self.assertAlmostEqual(relative_entropy[112], 2.0)
self.assertAlmostEqual(relative_entropy[113], 2.0)
self.assertAlmostEqual(relative_entropy[114], 2.0)
self.assertAlmostEqual(relative_entropy[115], 2.0)
self.assertAlmostEqual(relative_entropy[116], 2.0)
self.assertAlmostEqual(relative_entropy[117], 2.0)
self.assertAlmostEqual(relative_entropy[118], 2.0)
self.assertAlmostEqual(relative_entropy[119], 2.0)
self.assertAlmostEqual(relative_entropy[120], 2.0)
self.assertAlmostEqual(relative_entropy[121], 2.0)
self.assertAlmostEqual(relative_entropy[122], 2.0)
self.assertAlmostEqual(relative_entropy[123], 2.0)
self.assertAlmostEqual(relative_entropy[124], 2.0)
self.assertAlmostEqual(relative_entropy[125], 2.0)
self.assertAlmostEqual(relative_entropy[126], 2.0)
self.assertAlmostEqual(relative_entropy[127], 2.0)
self.assertAlmostEqual(relative_entropy[128], 2.0)
self.assertAlmostEqual(relative_entropy[129], 2.0)
self.assertAlmostEqual(relative_entropy[130], 2.0)
self.assertAlmostEqual(relative_entropy[131], 2.0)
self.assertAlmostEqual(relative_entropy[132], 2.0)
self.assertAlmostEqual(relative_entropy[133], 2.0)
self.assertAlmostEqual(relative_entropy[134], 2.0)
self.assertAlmostEqual(relative_entropy[135], 2.0)
self.assertAlmostEqual(relative_entropy[136], 1.4083272214176723)
self.assertAlmostEqual(relative_entropy[137], 2.0)
self.assertAlmostEqual(relative_entropy[138], 2.0)
self.assertAlmostEqual(relative_entropy[139], 2.0)
self.assertAlmostEqual(relative_entropy[140], 2.0)
self.assertAlmostEqual(relative_entropy[141], 2.0)
self.assertAlmostEqual(relative_entropy[142], 2.0)
self.assertAlmostEqual(relative_entropy[143], 2.0)
self.assertAlmostEqual(relative_entropy[144], 2.0)
self.assertAlmostEqual(relative_entropy[145], 2.0)
self.assertAlmostEqual(relative_entropy[146], 2.0)
self.assertAlmostEqual(relative_entropy[147], 2.0)
self.assertAlmostEqual(relative_entropy[148], 1.4083272214176723)
self.assertAlmostEqual(relative_entropy[149], 2.0)
self.assertAlmostEqual(relative_entropy[150], 2.0)
self.assertAlmostEqual(relative_entropy[151], 2.0)
self.assertAlmostEqual(relative_entropy[152], 2.0)
self.assertAlmostEqual(relative_entropy[153], 2.0)
self.assertAlmostEqual(relative_entropy[154], 2.0)
self.assertAlmostEqual(relative_entropy[155], 2.0)
# create a new-style Alignment object
del seq_record
del align_info
del dictionary
del value
alignment = msa.alignment
self.assertEqual(len(alignment), 7)
seq_record = alignment.sequences[0]
self.assertEqual(seq_record.description, "gi|6273285|gb|AF191659.1|AF191")
self.assertEqual(
alignment[0],
"TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATA----------ATATATTTCAAATTTCCTTATATACCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCCATTGATTTAGTGTACCAGA",
)
self.assertEqual(
seq_record.seq,
Seq(
"TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATAATATATTTCAAATTTCCTTATATACCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCCATTGATTTAGTGTACCAGA"
),
)
seq_record = alignment.sequences[1]
self.assertEqual(seq_record.description, "gi|6273284|gb|AF191658.1|AF191")
self.assertEqual(
alignment[1],
"TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATA--------ATATATTTCAAATTTCCTTATATACCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA",
)
self.assertEqual(
seq_record.seq,
"TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATAATATATTTCAAATTTCCTTATATACCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA",
)
seq_record = alignment.sequences[2]
self.assertEqual(seq_record.description, "gi|6273287|gb|AF191661.1|AF191")
self.assertEqual(
alignment[2],
"TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATA----------ATATATTTCAAATTTCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA",
)
self.assertEqual(
seq_record.seq,
"TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATAATATATTTCAAATTTCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA",
)
seq_record = alignment.sequences[3]
self.assertEqual(seq_record.description, "gi|6273286|gb|AF191660.1|AF191")
self.assertEqual(
alignment[3],
"TATACATAAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATA----------ATATATTTATAATTTCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA",
)
self.assertEqual(
seq_record.seq,
"TATACATAAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATAATATATTTATAATTTCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA",
)
seq_record = alignment.sequences[4]
self.assertEqual(seq_record.description, "gi|6273290|gb|AF191664.1|AF191")
self.assertEqual(
alignment[4],
"TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATATA------ATATATTTCAAATTCCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA",
)
self.assertEqual(
seq_record.seq,
"TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATATAATATATTTCAAATTCCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA",
)
seq_record = alignment.sequences[5]
self.assertEqual(seq_record.description, "gi|6273289|gb|AF191663.1|AF191")
self.assertEqual(
alignment[5],
"TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATATA------ATATATTTCAAATTCCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTATACCAGA",
)
self.assertEqual(
seq_record.seq,
"TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATATAATATATTTCAAATTCCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTATACCAGA",
)
seq_record = alignment.sequences[6]
self.assertEqual(seq_record.description, "gi|6273291|gb|AF191665.1|AF191")
self.assertEqual(
alignment[6],
"TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATATATATATAATATATTTCAAATTCCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA",
)
self.assertEqual(
seq_record.seq,
"TATACATTAAAGGAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATATATATATAATATATTTCAAATTCCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA",
)
self.assertEqual(alignment.shape, (7, 156))
substitutions = alignment.substitutions
self.assertEqual(len(substitutions), 4)
self.assertEqual(substitutions.shape, (4, 4))
self.assertAlmostEqual(substitutions[("A", "A")], 1395)
self.assertAlmostEqual(substitutions[("A", "C")], 3)
self.assertAlmostEqual(substitutions[("A", "G")], 13)
self.assertAlmostEqual(substitutions[("A", "T")], 6)
self.assertAlmostEqual(substitutions[("C", "A")], 3)
self.assertAlmostEqual(substitutions[("C", "C")], 271)
self.assertAlmostEqual(substitutions[("C", "G")], 0)
self.assertAlmostEqual(substitutions[("C", "T")], 16)
self.assertAlmostEqual(substitutions[("G", "A")], 5)
self.assertAlmostEqual(substitutions[("G", "C")], 0)
self.assertAlmostEqual(substitutions[("G", "G")], 480)
self.assertAlmostEqual(substitutions[("G", "T")], 0)
self.assertAlmostEqual(substitutions[("T", "A")], 6)
self.assertAlmostEqual(substitutions[("T", "C")], 12)
self.assertAlmostEqual(substitutions[("T", "G")], 0)
self.assertAlmostEqual(substitutions[("T", "T")], 874)
motif = motifs.Motif(alphabet="ACGT", alignment=alignment)
self.assertEqual(
motif.consensus,
"TATACATTAAAGAAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATATATATATAATATATTTCAAATTTCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA",
)
self.assertEqual(
motif.degenerate_consensus,
"TATACATTAAAGRAGGGGGATGCGGATAAATGGAAAGGCGAAAGAAAGAATATATATATATATATAATATATTTCAAATTYCCTTATATATCCAAATATAAAAATATCTAATAAATTAGATGAATATCAAAGAATCTATTGATTTAGTGTACCAGA",
)
matrix = motif.counts
self.assertEqual(
str(matrix),
"""\
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
A: 0.00 7.00 0.00 7.00 0.00 7.00 0.00 1.00 7.00 7.00 7.00 0.00 4.00 7.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 7.00 7.00 7.00 0.00 0.00 0.00 7.00 7.00 7.00 0.00 0.00 0.00 0.00 7.00 7.00 7.00 0.00 7.00 7.00 7.00 0.00 7.00 7.00 0.00 7.00 0.00 7.00 0.00 7.00 0.00 4.00 0.00 3.00 0.00 1.00 0.00 1.00 0.00 1.00 7.00 0.00 7.00 0.00 7.00 0.00 0.00 0.00 1.00 6.00 7.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 7.00 0.00 7.00 0.00 0.00 0.00 7.00 7.00 7.00 0.00 7.00 0.00 7.00 7.00 7.00 7.00 7.00 0.00 7.00 0.00 0.00 0.00 7.00 7.00 0.00 7.00 7.00 7.00 0.00 0.00 7.00 0.00 7.00 0.00 0.00 7.00 7.00 0.00 7.00 0.00 0.00 7.00 7.00 7.00 0.00 7.00 7.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 7.00 0.00 0.00 1.00 0.00 7.00 0.00 0.00 7.00 0.00 7.00
C: 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 6.00 0.00 0.00 0.00 0.00 0.00 3.00 7.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 2.00 7.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 1.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 7.00 0.00 0.00 0.00
G: 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 3.00 0.00 7.00 7.00 7.00 7.00 7.00 0.00 0.00 7.00 0.00 7.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 7.00 0.00 0.00 0.00 7.00 7.00 0.00 7.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 6.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00
T: 7.00 0.00 7.00 0.00 0.00 0.00 7.00 6.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 7.00 0.00 7.00 0.00 4.00 0.00 3.00 0.00 1.00 0.00 1.00 0.00 1.00 0.00 0.00 7.00 0.00 7.00 0.00 7.00 7.00 7.00 0.00 1.00 0.00 0.00 7.00 7.00 4.00 0.00 0.00 7.00 7.00 0.00 7.00 0.00 7.00 0.00 5.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 7.00 0.00 7.00 0.00 0.00 7.00 0.00 0.00 0.00 7.00 7.00 0.00 0.00 0.00 7.00 0.00 0.00 0.00 7.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7.00 0.00 6.00 0.00 7.00 7.00 0.00 0.00 7.00 7.00 7.00 0.00 0.00 7.00 0.00 7.00 0.00 0.00 0.00 0.00 0.00 0.00
""",
)
self.assertEqual(
format(motif, "transfac"),
"""\
P0 A C G T
01 0 0 0 7 T
02 7 0 0 0 A
03 0 0 0 7 T
04 7 0 0 0 A
05 0 7 0 0 C
06 7 0 0 0 A
07 0 0 0 7 T
08 1 0 0 6 T
09 7 0 0 0 A
10 7 0 0 0 A
11 7 0 0 0 A
12 0 0 7 0 G
13 4 0 3 0 R
14 7 0 0 0 A
15 0 0 7 0 G
16 0 0 7 0 G
17 0 0 7 0 G
18 0 0 7 0 G
19 0 0 7 0 G
20 7 0 0 0 A
21 0 0 0 7 T
22 0 0 7 0 G
23 0 7 0 0 C
24 0 0 7 0 G
25 0 0 7 0 G
26 7 0 0 0 A
27 0 0 0 7 T
28 7 0 0 0 A
29 7 0 0 0 A
30 7 0 0 0 A
31 0 0 0 7 T
32 0 0 7 0 G
33 0 0 7 0 G
34 7 0 0 0 A
35 7 0 0 0 A
36 7 0 0 0 A
37 0 0 7 0 G
38 0 0 7 0 G
39 0 7 0 0 C
40 0 0 7 0 G
41 7 0 0 0 A
42 7 0 0 0 A
43 7 0 0 0 A
44 0 0 7 0 G
45 7 0 0 0 A
46 7 0 0 0 A
47 7 0 0 0 A
48 0 0 7 0 G
49 7 0 0 0 A
50 7 0 0 0 A
51 0 0 0 7 T
52 7 0 0 0 A
53 0 0 0 7 T
54 7 0 0 0 A
55 0 0 0 7 T
56 7 0 0 0 A
57 0 0 0 4 T
58 4 0 0 0 A
59 0 0 0 3 T
60 3 0 0 0 A
61 0 0 0 1 T
62 1 0 0 0 A
63 0 0 0 1 T
64 1 0 0 0 A
65 0 0 0 1 T
66 1 0 0 0 A
67 7 0 0 0 A
68 0 0 0 7 T
69 7 0 0 0 A
70 0 0 0 7 T
71 7 0 0 0 A
72 0 0 0 7 T
73 0 0 0 7 T
74 0 0 0 7 T
75 1 6 0 0 C
76 6 0 0 1 A
77 7 0 0 0 A
78 7 0 0 0 A
79 0 0 0 7 T
80 0 0 0 7 T
81 0 3 0 4 Y
82 0 7 0 0 C
83 0 7 0 0 C
84 0 0 0 7 T
85 0 0 0 7 T
86 7 0 0 0 A
87 0 0 0 7 T
88 7 0 0 0 A
89 0 0 0 7 T
90 7 0 0 0 A
91 0 2 0 5 T
92 0 7 0 0 C
93 0 7 0 0 C
94 7 0 0 0 A
95 7 0 0 0 A
96 7 0 0 0 A
97 0 0 0 7 T
98 7 0 0 0 A
99 0 0 0 7 T
100 7 0 0 0 A
101 7 0 0 0 A
102 7 0 0 0 A
103 7 0 0 0 A
104 7 0 0 0 A
105 0 0 0 7 T
106 7 0 0 0 A
107 0 0 0 7 T
108 0 7 0 0 C
109 0 0 0 7 T
110 7 0 0 0 A
111 7 0 0 0 A
112 0 0 0 7 T
113 7 0 0 0 A
114 7 0 0 0 A
115 7 0 0 0 A
116 0 0 0 7 T
117 0 0 0 7 T
118 7 0 0 0 A
119 0 0 7 0 G
120 7 0 0 0 A
121 0 0 0 7 T
122 0 0 7 0 G
123 7 0 0 0 A
124 7 0 0 0 A
125 0 0 0 7 T
126 7 0 0 0 A
127 0 0 0 7 T
128 0 7 0 0 C
129 7 0 0 0 A
130 7 0 0 0 A
131 7 0 0 0 A
132 0 0 7 0 G
133 7 0 0 0 A
134 7 0 0 0 A
135 0 0 0 7 T
136 0 7 0 0 C
137 0 1 0 6 T
138 7 0 0 0 A
139 0 0 0 7 T
140 0 0 0 7 T
141 0 0 7 0 G
142 7 0 0 0 A
143 0 0 0 7 T
144 0 0 0 7 T
145 0 0 0 7 T
146 7 0 0 0 A
147 0 0 7 0 G
148 0 0 0 7 T
149 1 0 6 0 G
150 0 0 0 7 T
151 7 0 0 0 A
152 0 7 0 0 C
153 0 7 0 0 C
154 7 0 0 0 A
155 0 0 7 0 G
156 7 0 0 0 A
XX
//
""",
)
self.assertAlmostEqual(sum(motif[5:50].relative_entropy), 88.42309908538343)
relative_entropy = motif.relative_entropy
self.assertAlmostEqual(sum(relative_entropy[5:50]), 88.42309908538343)
self.assertAlmostEqual(sum(relative_entropy), 306.20805926645323)
self.assertEqual(alignment[:, 1], "AAAAAAA")
self.assertAlmostEqual(motif.relative_entropy[1], 2.0)
self.assertEqual(alignment[:, 7], "TTTATTT")
self.assertAlmostEqual(relative_entropy[7], 1.4083272214176723)
def test_read_fasta(self):
path = os.path.join(os.curdir, "Quality", "example.fasta")
msa = AlignIO.read(path, "fasta")
self.assertEqual(len(msa), 3)
seq_record = msa[0]
self.assertEqual(seq_record.description, "EAS54_6_R1_2_1_413_324")
self.assertEqual(seq_record.seq, "CCCTTCTTGTCTTCAGCGTTTCTCC")
seq_record = msa[1]
self.assertEqual(seq_record.description, "EAS54_6_R1_2_1_540_792")
self.assertEqual(seq_record.seq, "TTGGCAGGCCAAGGCCGATGGATCA")
seq_record = msa[2]
self.assertEqual(seq_record.description, "EAS54_6_R1_2_1_443_348")
self.assertEqual(seq_record.seq, "GTTGCTTCTGGCGTGGGTGGGGGGG")
self.assertEqual(msa.get_alignment_length(), 25)
self.assertEqual(
str(msa),
"""\
Alignment with 3 rows and 25 columns
CCCTTCTTGTCTTCAGCGTTTCTCC EAS54_6_R1_2_1_413_324
TTGGCAGGCCAAGGCCGATGGATCA EAS54_6_R1_2_1_540_792
GTTGCTTCTGGCGTGGGTGGGGGGG EAS54_6_R1_2_1_443_348""",
)
alignment = msa.alignment
self.assertEqual(len(alignment), 3)
seq_record = alignment.sequences[0]
self.assertEqual(seq_record.description, "EAS54_6_R1_2_1_413_324")
self.assertEqual(seq_record.seq, "CCCTTCTTGTCTTCAGCGTTTCTCC")
seq_record = alignment.sequences[1]
self.assertEqual(seq_record.description, "EAS54_6_R1_2_1_540_792")
self.assertEqual(seq_record.seq, "TTGGCAGGCCAAGGCCGATGGATCA")
seq_record = alignment.sequences[2]
self.assertEqual(seq_record.description, "EAS54_6_R1_2_1_443_348")
self.assertEqual(seq_record.seq, "GTTGCTTCTGGCGTGGGTGGGGGGG")
self.assertEqual(alignment.length, 25)
motif = motifs.Motif(alphabet="ACGT", alignment=alignment)
self.assertEqual(motif.consensus, "CTCGCATCCCAAGCAGGATGGATCA")
self.assertEqual(motif.degenerate_consensus, "BYBKYHKBBBVHKBVSSDKKKVKSV")
self.assertEqual(
str(alignment),
"""\
EAS54_6_R 0 CCCTTCTTGTCTTCAGCGTTTCTCC 25
EAS54_6_R 0 TTGGCAGGCCAAGGCCGATGGATCA 25
EAS54_6_R 0 GTTGCTTCTGGCGTGGGTGGGGGGG 25
""",
)
self.assertEqual(
motif.counts.calculate_consensus(identity=0.6), "NTNGCNTNNNNNGNNGGNTGGNTCN"
)
if __name__ == "__main__":
runner = unittest.TextTestRunner(verbosity=2)
unittest.main(testRunner=runner)