Apply black style to test_m* files, version 19.10b0.

test_Mafft_tool.py
test_MafIO_index.py
test_MarkovModel.py
test_Medline.py
test_mmtf.py
test_mmtf_online.py
test_motifs.py
test_motifs_online.py
test_MSAProbs_tool.py
test_Muscle_tool.py
This commit is contained in:
svalqui
2020-02-21 15:11:54 +11:00
committed by Peter Cock
parent 8acd8c81a3
commit 07d430b5c9
10 changed files with 1328 additions and 662 deletions

View File

@ -31,11 +31,11 @@ except FileNotFoundError:
if not msaprobs_exe:
raise MissingExternalDependencyError(
"Install msaprobs if you want to use MSAProbs from Biopython.")
"Install msaprobs if you want to use MSAProbs from Biopython."
)
class MSAProbsTestCase(unittest.TestCase):
def setUp(self):
self.files_to_clean = set()
@ -57,11 +57,11 @@ class MSAProbsTestCase(unittest.TestCase):
"""Add a file for deferred removal by the tearDown routine."""
self.files_to_clean.add(filename)
#################################################################
class MSAProbsTestErrorConditions(MSAProbsTestCase):
def test_empty_file(self):
"""Test an empty file."""
input_file = "does_not_exist.fasta"
@ -70,9 +70,12 @@ class MSAProbsTestErrorConditions(MSAProbsTestCase):
try:
stdout, stderr = cline()
except ApplicationError as err:
self.assertTrue("Cannot open sequence file" in str(err) or
"Cannot open input file" in str(err) or
"Non-zero return code " in str(err), str(err))
self.assertTrue(
"Cannot open sequence file" in str(err)
or "Cannot open input file" in str(err)
or "Non-zero return code " in str(err),
str(err),
)
else:
self.fail("Should have failed, returned:\n%s\n%s" % (stdout, stderr))
@ -105,20 +108,19 @@ class MSAProbsTestErrorConditions(MSAProbsTestCase):
else:
self.fail("Should have failed, returned:\n%s\n%s" % (stdout, stderr))
#################################################################
class MSAProbsTestNormalConditions(MSAProbsTestCase):
def test_simple_fasta(self):
"""Test a simple fasta file."""
input_file = "Registry/seqs.fasta"
output_file = "temp_test.aln"
cline = MSAProbsCommandline(msaprobs_exe,
infile=input_file,
outfile=output_file,
clustalw=True)
cline = MSAProbsCommandline(
msaprobs_exe, infile=input_file, outfile=output_file, clustalw=True
)
self.standard_test_procedure(cline)
@ -141,10 +143,9 @@ class MSAProbsTestNormalConditions(MSAProbsTestCase):
SeqIO.write(SeqIO.parse("Phylip/hennigian.phy", "phylip"), handle, "fasta")
output_file = "temp_test.aln"
cline = MSAProbsCommandline(msaprobs_exe,
infile=input_file,
outfile=output_file,
clustalw=True)
cline = MSAProbsCommandline(
msaprobs_exe, infile=input_file, outfile=output_file, clustalw=True
)
self.add_file_to_clean(input_file)
self.standard_test_procedure(cline)
@ -154,10 +155,9 @@ class MSAProbsTestNormalConditions(MSAProbsTestCase):
input_file = "Registry/seqs.fasta"
output_file = "temp with spaces.aln"
cline = MSAProbsCommandline(msaprobs_exe,
infile=input_file,
outfile=output_file,
clustalw=True)
cline = MSAProbsCommandline(
msaprobs_exe, infile=input_file, outfile=output_file, clustalw=True
)
self.standard_test_procedure(cline)

View File

@ -28,16 +28,18 @@ class StaticMethodTest(unittest.TestCase):
"""Test static UCSC binning-related functions."""
def test_region2bin(self):
data = [(25079603, 25079787, {0, 1, 11, 96, 776}),
(25128173, 25128248, {0, 1, 11, 96, 776}),
(50312474, 50312703, {0, 1, 968, 14, 120}),
(41905591, 41906101, {0, 1, 904, 13, 112}),
(16670899, 16673060, {0, 1, 10, 712, 88}),
(75495356, 75495494, {0, 1, 2, 1160, 144, 17}),
(92259501, 92261053, {0, 1, 2, 1288, 160, 19}),
(83834063, 83838132, {0, 1, 2, 1224, 18, 152}),
(7309597, 7310411, {0, 1, 640, 79, 9}),
(6190410, 6190999, {0, 1, 632, 78, 9})]
data = [
(25079603, 25079787, {0, 1, 11, 96, 776}),
(25128173, 25128248, {0, 1, 11, 96, 776}),
(50312474, 50312703, {0, 1, 968, 14, 120}),
(41905591, 41906101, {0, 1, 904, 13, 112}),
(16670899, 16673060, {0, 1, 10, 712, 88}),
(75495356, 75495494, {0, 1, 2, 1160, 144, 17}),
(92259501, 92261053, {0, 1, 2, 1288, 160, 19}),
(83834063, 83838132, {0, 1, 2, 1224, 18, 152}),
(7309597, 7310411, {0, 1, 640, 79, 9}),
(6190410, 6190999, {0, 1, 632, 78, 9}),
]
for x, y, z in data:
self.assertEqual(MafIndex._region2bin(x, y), z)
@ -46,16 +48,18 @@ class StaticMethodTest(unittest.TestCase):
self.assertRaises(TypeError, MafIndex._region2bin, str(x), str(y))
def test_ucscbin(self):
data = [(25079603, 25079787, 776),
(25128173, 25128248, 776),
(50312474, 50312703, 968),
(41905591, 41906101, 904),
(16670899, 16673060, 712),
(75495356, 75495494, 1160),
(92259501, 92261053, 1288),
(83834063, 83838132, 1224),
(7309597, 7310411, 640),
(6190410, 6190999, 632)]
data = [
(25079603, 25079787, 776),
(25128173, 25128248, 776),
(50312474, 50312703, 968),
(41905591, 41906101, 904),
(16670899, 16673060, 712),
(75495356, 75495494, 1160),
(92259501, 92261053, 1288),
(83834063, 83838132, 1224),
(7309597, 7310411, 640),
(6190410, 6190999, 632),
]
for x, y, z in data:
self.assertEqual(MafIndex._ucscbin(x, y), z)
@ -65,62 +69,78 @@ class StaticMethodTest(unittest.TestCase):
if sqlite3:
class PreBuiltIndexTest(unittest.TestCase):
"""Test loading of prebuilt indices."""
def test_old(self):
idx = MafIndex("MAF/ucsc_mm9_chr10.mafindex",
"MAF/ucsc_mm9_chr10.maf", "mm9.chr10")
idx = MafIndex(
"MAF/ucsc_mm9_chr10.mafindex", "MAF/ucsc_mm9_chr10.maf", "mm9.chr10"
)
self.assertEqual(len(idx), 48)
def test_old_wrong_target_seqname(self):
self.assertRaises(ValueError,
MafIndex,
"MAF/ucsc_mm9_chr10.mafindex",
"MAF/ucsc_mm9_chr10.maf",
"mm9.chr11")
self.assertRaises(
ValueError,
MafIndex,
"MAF/ucsc_mm9_chr10.mafindex",
"MAF/ucsc_mm9_chr10.maf",
"mm9.chr11",
)
def test_old_wrong_filename(self):
self.assertRaises(ValueError,
MafIndex,
"MAF/ucsc_mm9_chr10.mafindex",
"MAF/humor.maf",
"mm9.chr10")
self.assertRaises(
ValueError,
MafIndex,
"MAF/ucsc_mm9_chr10.mafindex",
"MAF/humor.maf",
"mm9.chr10",
)
def test_old_file_not_found(self):
self.assertRaises(FileNotFoundError,
MafIndex,
"MAF/ucsc_mm9_chr11.mafindex",
"MAF/ucsc_mm9_chr11.maf",
"mm9.chr11")
self.assertRaises(
FileNotFoundError,
MafIndex,
"MAF/ucsc_mm9_chr11.mafindex",
"MAF/ucsc_mm9_chr11.maf",
"mm9.chr11",
)
def test_old_wrong_version(self):
self.assertRaises(ValueError,
MafIndex,
"MAF/wrong_version.idx",
"MAF/ucsc_mm9_chr10.maf",
"mm9.chr10")
self.assertRaises(
ValueError,
MafIndex,
"MAF/wrong_version.idx",
"MAF/ucsc_mm9_chr10.maf",
"mm9.chr10",
)
def test_old_unfinished_index(self):
self.assertRaises(ValueError,
MafIndex,
"MAF/unfinished.idx",
"MAF/ucsc_mm9_chr10.maf",
"mm9.chr10")
self.assertRaises(
ValueError,
MafIndex,
"MAF/unfinished.idx",
"MAF/ucsc_mm9_chr10.maf",
"mm9.chr10",
)
def test_old_corrupt_index(self):
self.assertRaises(ValueError,
MafIndex,
"MAF/corrupt.idx",
"MAF/ucsc_mm9_chr10.maf",
"mm9.chr10")
self.assertRaises(
ValueError,
MafIndex,
"MAF/corrupt.idx",
"MAF/ucsc_mm9_chr10.maf",
"mm9.chr10",
)
def test_old_invalid_sqlite(self):
self.assertRaises(ValueError,
MafIndex,
"MAF/invalid.idx",
"MAF/ucsc_mm9_chr10.maf",
"mm9.chr10")
self.assertRaises(
ValueError,
MafIndex,
"MAF/invalid.idx",
"MAF/ucsc_mm9_chr10.maf",
"mm9.chr10",
)
class NewIndexTest(unittest.TestCase):
"""Test creation of new indices."""
@ -142,53 +162,70 @@ if sqlite3:
self.assertEqual(len(idx), 983)
def test_bundle_without_target(self):
self.assertRaises(ValueError,
MafIndex,
self.tmpfile,
"MAF/bundle_without_target.maf",
"mm9.chr10")
self.assertRaises(
ValueError,
MafIndex,
self.tmpfile,
"MAF/bundle_without_target.maf",
"mm9.chr10",
)
def test_length_coords_mismatch(self):
self.assertRaises(ValueError,
MafIndex,
self.tmpfile,
"MAF/length_coords_mismatch.maf",
"mm9.chr10")
self.assertRaises(
ValueError,
MafIndex,
self.tmpfile,
"MAF/length_coords_mismatch.maf",
"mm9.chr10",
)
class TestGetRecord(unittest.TestCase):
"""Make sure we can seek and fetch records properly."""
def setUp(self):
self.idx = MafIndex("MAF/ucsc_mm9_chr10.mafindex",
"MAF/ucsc_mm9_chr10.maf", "mm9.chr10")
self.idx = MafIndex(
"MAF/ucsc_mm9_chr10.mafindex", "MAF/ucsc_mm9_chr10.maf", "mm9.chr10"
)
self.assertEqual(len(self.idx), 48)
def test_records_begin(self):
recs = {}
recs[0] = SeqRecord(Seq("TCATAGGTATTTATTTTTAAATATGGTTTGCTTTATGGCTAGAA"
"CACACCGATTACTTAAAATAGGATTAACC--CCCATACACTTTA"
"AAAATGATTAAACAACATTTCTGCTGCTCGCTCACATTCTTCAT"
"AGAAGATGACATAATGTATTTTCCTTTTGGTT"),
id="mm9.chr10",
name="mm9.chr10",
description="",
annotations={"start": 3009319,
"srcSize": 129993255,
"strand": 1,
"size": 162})
recs[0] = SeqRecord(
Seq(
"TCATAGGTATTTATTTTTAAATATGGTTTGCTTTATGGCTAGAA"
"CACACCGATTACTTAAAATAGGATTAACC--CCCATACACTTTA"
"AAAATGATTAAACAACATTTCTGCTGCTCGCTCACATTCTTCAT"
"AGAAGATGACATAATGTATTTTCCTTTTGGTT"
),
id="mm9.chr10",
name="mm9.chr10",
description="",
annotations={
"start": 3009319,
"srcSize": 129993255,
"strand": 1,
"size": 162,
},
)
recs[1] = SeqRecord(Seq("TCACAGATATTTACTATTAAATATGGTTTGTTATATGGTTACGG"
"TTCATAGGTTACTTGGAATTGGATTAACCTTCTTATTCATTGCA"
"GAATTGGTTACACTGTGTTCTTGACCTTTGCTTGTTTTCTCCAT"
"GGAAACTGATGTCAAATACTTTCCCTTTGGTT"),
id="oryCun1.scaffold_133159",
name="oryCun1.scaffold_133159",
description="",
annotations={"start": 11087,
"srcSize": 13221,
"strand": 1,
"size": 164})
recs[1] = SeqRecord(
Seq(
"TCACAGATATTTACTATTAAATATGGTTTGTTATATGGTTACGG"
"TTCATAGGTTACTTGGAATTGGATTAACCTTCTTATTCATTGCA"
"GAATTGGTTACACTGTGTTCTTGACCTTTGCTTGTTTTCTCCAT"
"GGAAACTGATGTCAAATACTTTCCCTTTGGTT"
),
id="oryCun1.scaffold_133159",
name="oryCun1.scaffold_133159",
description="",
annotations={
"start": 11087,
"srcSize": 13221,
"strand": 1,
"size": 164,
},
)
fetched_recs = self.idx._get_record(34)
@ -198,59 +235,83 @@ if sqlite3:
def test_records_end(self):
recs = {}
recs[0] = SeqRecord(Seq("TGTTTAGTACC----ATGCTTAGGAATGATAAACTCACTTAGTGtt"),
id="mm9.chr10",
name="mm9.chr10",
description="",
annotations={"start": 3021494,
"srcSize": 129993255,
"strand": 1,
"size": 42})
recs[0] = SeqRecord(
Seq("TGTTTAGTACC----ATGCTTAGGAATGATAAACTCACTTAGTGtt"),
id="mm9.chr10",
name="mm9.chr10",
description="",
annotations={
"start": 3021494,
"srcSize": 129993255,
"strand": 1,
"size": 42,
},
)
recs[1] = SeqRecord(Seq("TGTTGCATGTCCTTTATTCTTTGGCGTGATAGGCTCACCCAATCTT"),
id="ponAbe2.chr6",
name="ponAbe2.chr6",
description="",
annotations={"start": 16173516,
"srcSize": 174210431,
"strand": -1,
"size": 46})
recs[1] = SeqRecord(
Seq("TGTTGCATGTCCTTTATTCTTTGGCGTGATAGGCTCACCCAATCTT"),
id="ponAbe2.chr6",
name="ponAbe2.chr6",
description="",
annotations={
"start": 16173516,
"srcSize": 174210431,
"strand": -1,
"size": 46,
},
)
recs[2] = SeqRecord(Seq("TGTTGCATATCCTTTATTCTTTGGCGTGATAGGCTCACCCAATCTT"),
id="panTro2.chr6",
name="panTro2.chr6",
description="",
annotations={"start": 16393864,
"srcSize": 173908612,
"strand": -1,
"size": 46})
recs[2] = SeqRecord(
Seq("TGTTGCATATCCTTTATTCTTTGGCGTGATAGGCTCACCCAATCTT"),
id="panTro2.chr6",
name="panTro2.chr6",
description="",
annotations={
"start": 16393864,
"srcSize": 173908612,
"strand": -1,
"size": 46,
},
)
recs[3] = SeqRecord(Seq("TGTTGCATGTCGTTTATTCTTTGGCGTGATAGGCTCACCCAATCTT"),
id="hg18.chr6",
name="hg18.chr6",
description="",
annotations={"start": 15875298,
"srcSize": 170899992,
"strand": -1,
"size": 46})
recs[3] = SeqRecord(
Seq("TGTTGCATGTCGTTTATTCTTTGGCGTGATAGGCTCACCCAATCTT"),
id="hg18.chr6",
name="hg18.chr6",
description="",
annotations={
"start": 15875298,
"srcSize": 170899992,
"strand": -1,
"size": 46,
},
)
recs[4] = SeqRecord(Seq("TGTTAAGTCTCACTTGCTGTTCAAAGTGATAGCTTCACTCCATCAT"),
id="canFam2.chr1",
name="canFam2.chr1",
description="",
annotations={"start": 78072287,
"srcSize": 125616256,
"strand": -1,
"size": 46})
recs[4] = SeqRecord(
Seq("TGTTAAGTCTCACTTGCTGTTCAAAGTGATAGCTTCACTCCATCAT"),
id="canFam2.chr1",
name="canFam2.chr1",
description="",
annotations={
"start": 78072287,
"srcSize": 125616256,
"strand": -1,
"size": 46,
},
)
recs[5] = SeqRecord(Seq("TGTTTAAAATG----ATTGCTAGAACTTCTA--CTCACTGGA----"),
id="ornAna1.chr2",
name="ornAna1.chr2",
description="",
annotations={"start": 14757144,
"srcSize": 54797317,
"strand": -1,
"size": 36})
recs[5] = SeqRecord(
Seq("TGTTTAAAATG----ATTGCTAGAACTTCTA--CTCACTGGA----"),
id="ornAna1.chr2",
name="ornAna1.chr2",
description="",
annotations={
"start": 14757144,
"srcSize": 54797317,
"strand": -1,
"size": 36,
},
)
fetched_recs = self.idx._get_record(99228)
@ -261,8 +322,9 @@ if sqlite3:
"""Test index searching on a properly-formatted MAF."""
def setUp(self):
self.idx = MafIndex("MAF/ucsc_mm9_chr10.mafindex",
"MAF/ucsc_mm9_chr10.maf", "mm9.chr10")
self.idx = MafIndex(
"MAF/ucsc_mm9_chr10.mafindex", "MAF/ucsc_mm9_chr10.maf", "mm9.chr10"
)
self.assertEqual(len(self.idx), 48)
def test_invalid_type_1(self):
@ -288,11 +350,12 @@ if sqlite3:
self.assertEqual(len(results), 4 + 4)
self.assertEqual({len(x) for x in results},
{4, 1, 9, 10, 4, 3, 5, 1})
self.assertEqual({len(x) for x in results}, {4, 1, 9, 10, 4, 3, 5, 1})
# Code formatting note:
# Expected start coordinates are grouped by alignment blocks
# Turn black code style off
# fmt: off
self.assertEqual(
{x.annotations["start"] for y in results for x in y},
{
@ -302,7 +365,11 @@ if sqlite3:
3014842, 1371, 7842, 171548, 16169512, 16389874, 15871306, 6404, 184317, 14750994,
3018161, 16390178, 15871611, 16169818,
3018230, 15871676, 16390243,
3018359, 16390338, 15871771, 184712, 16169976, 3018482})
3018359, 16390338, 15871771, 184712, 16169976, 3018482
}
)
# Turn black code style on
# fmt: on
def test_correct_retrieval_2(self):
search = self.idx.search((3009319, 3021421), (3012566, 3021536))
@ -310,11 +377,12 @@ if sqlite3:
self.assertEqual(len(results), 6)
self.assertEqual({len(x) for x in results},
{2, 4, 5, 14, 7, 6})
self.assertEqual({len(x) for x in results}, {2, 4, 5, 14, 7, 6})
# Code formatting note:
# Expected start coordinates are grouped by alignment blocks
# Turn black code style off
# fmt: off
self.assertEqual(
{x.annotations["start"] for y in results for x in y},
{
@ -323,28 +391,39 @@ if sqlite3:
3012441, 15860899, 16379447, 16160646, 180525,
3021421, 9910, 996, 16173434, 16393782, 15875216, 11047, 175213, 3552, 677, 78072203, 3590, 95587, 14757054,
3021465, 9957, 16173483, 16393831, 15875265, 78072243, 14757099,
3021494, 16173516, 16393864, 15875298, 78072287, 14757144})
3021494, 16173516, 16393864, 15875298, 78072287, 14757144
}
)
# Turn black code style on
# fmt: on
def test_correct_retrieval_3(self):
"""Following issue 1083.
https://github.com/biopython/biopython/issues/1083
"""
search = self.idx.search((3012076, 3012076 + 300), (3012076 + 100, 3012076 + 400))
search = self.idx.search(
(3012076, 3012076 + 300), (3012076 + 100, 3012076 + 400)
)
results = list(search)
self.assertEqual(len(results), 2)
self.assertEqual({len(x) for x in results},
{4, 5})
self.assertEqual({len(x) for x in results}, {4, 5})
# Code formatting note:
# Expected start coordinates are grouped by alignment blocks
# Turn black code style off
# fmt: off
self.assertEqual(
{x.annotations["start"] for y in results for x in y},
{
3012076, 16160203, 16379004, 15860456,
3012441, 15860899, 16379447, 16160646, 180525})
3012441, 15860899, 16379447, 16160646, 180525
}
)
# Turn black code style on
# fmt: on
def test_correct_block_boundary(self):
"""Following issues 504 and 1086.
@ -446,7 +525,8 @@ if sqlite3:
"panTro2.chr6": 53,
"calJac1.Contig6394": 53,
"otoGar1.scaffold_334.1-359464": 52,
"loxAfr1.scaffold_75566": 54}
"loxAfr1.scaffold_75566": 54,
}
for seq_id, length in correct_lengths.items():
self.assertEqual(len(seq_dict[seq_id].ungap("-")), length)
@ -483,7 +563,8 @@ if sqlite3:
"panTro2.chr6": "GGGATCATAAACCATTTAATCTGTGAAATATCTAATCTTTTGGGAAATAGTGG",
"calJac1.Contig6394": "GGGATCATAAGCCATTTAATCTGTGAAATGTGAAATCTTTTGGGAAACAGTGG",
"otoGar1.scaffold_334.1-359464": "GGAAGCATAAACTTTTAATCTATGAAATATCAAATCACTTGGGCAATAGCTG",
"loxAfr1.scaffold_75566": "GGGAGTATAAACCATTTAGTCTGCGAAATGCCAAATCTTCAGGGGAAAAAGCTG"}
"loxAfr1.scaffold_75566": "GGGAGTATAAACCATTTAGTCTGCGAAATGCCAAATCTTCAGGGGAAAAAGCTG",
}
for seq_id, sequence in correct_sequences.items():
self.assertEqual(seq_dict[seq_id].ungap("-"), sequence)
@ -537,7 +618,8 @@ if sqlite3:
"panTro2.chr6": "CCTATACCTTTCTTTTATGAGAATTTTGTTTTAATCCTAAACTTTTGGGATCATAAACCATTTAATCTGTGAAATATCTAATCTTTTGGGAAATAGTGG",
"calJac1.Contig6394": "CCTATACCTTTCTTTCATGAGAATTTTGTTTGAATCCTAAACTTTTGGGATCATAAGCCATTTAATCTGTGAAATGTGAAATCTTTTGGGAAACAGTGG",
"otoGar1.scaffold_334.1-359464": "GGAAGCATAAACTTTTAATCTATGAAATATCAAATCACTTGGGCAATAGCTG",
"loxAfr1.scaffold_75566": "TTTGGTTAGAATTATGCTTTAATTCAAAACTTCCGGGAGTATAAACCATTTAGTCTGCGAAATGCCAAATCTTCAGGGGAAAAAGCTG"}
"loxAfr1.scaffold_75566": "TTTGGTTAGAATTATGCTTTAATTCAAAACTTCCGGGAGTATAAACCATTTAGTCTGCGAAATGCCAAATCTTCAGGGGAAAAAGCTG",
}
for seq_id, sequence in correct_sequences.items():
self.assertEqual(seq_dict[seq_id].ungap("-"), sequence)
@ -545,8 +627,11 @@ if sqlite3:
"""Test index searching on an incorrectly-formatted MAF."""
def setUp(self):
self.idx = MafIndex("MAF/ucsc_mm9_chr10_bad.mafindex",
"MAF/ucsc_mm9_chr10_bad.maf", "mm9.chr10")
self.idx = MafIndex(
"MAF/ucsc_mm9_chr10_bad.mafindex",
"MAF/ucsc_mm9_chr10_bad.maf",
"mm9.chr10",
)
self.assertEqual(len(self.idx), 48)
def test_incorrect_bundle_coords(self):
@ -557,14 +642,17 @@ if sqlite3:
"""Test in silico splicing on a correctly-formatted MAF."""
def setUp(self):
self.idx = MafIndex("MAF/ucsc_mm9_chr10_big.mafindex",
"MAF/ucsc_mm9_chr10_big.maf", "mm9.chr10")
self.idx = MafIndex(
"MAF/ucsc_mm9_chr10_big.mafindex",
"MAF/ucsc_mm9_chr10_big.maf",
"mm9.chr10",
)
self.assertEqual(len(self.idx), 983)
def test_invalid_strand(self):
self.assertRaises(ValueError,
self.idx.get_spliced,
(0, 1000), (500, 1500), ".")
self.assertRaises(
ValueError, self.idx.get_spliced, (0, 1000), (500, 1500), "."
)
def test_no_alignment(self):
result = self.idx.get_spliced((0, 1000), (500, 1500), 1)
@ -580,18 +668,44 @@ if sqlite3:
an actual gene (Cnksr3) in mouse. It should perfectly match the
spliced transcript pulled independently from UCSC.
"""
result = self.idx.get_spliced((3134303, 3185733, 3192055, 3193589,
3203538, 3206102, 3208126, 3211424,
3211872, 3217393, 3219697, 3220356,
3225954),
(3134909, 3185897, 3192258, 3193677,
3203580, 3206222, 3208186, 3211493,
3212019, 3217518, 3219906, 3220446,
3227479), 1)
result = self.idx.get_spliced(
(
3134303,
3185733,
3192055,
3193589,
3203538,
3206102,
3208126,
3211424,
3211872,
3217393,
3219697,
3220356,
3225954,
),
(
3134909,
3185897,
3192258,
3193677,
3203580,
3206222,
3208186,
3211493,
3212019,
3217518,
3219906,
3220446,
3227479,
),
1,
)
cnksr3 = str(SeqIO.read("MAF/cnksr3.fa", "fasta").seq).upper()
mm9_seq = "".join([str(x.seq) for x in result
if x.id.startswith("mm9")]).replace("-", "")
mm9_seq = "".join(
[str(x.seq) for x in result if x.id.startswith("mm9")]
).replace("-", "")
self.assertEqual(mm9_seq, cnksr3)
@ -599,19 +713,23 @@ if sqlite3:
"""Test in silico splicing on an incorrectly-formatted MAF."""
def setUp(self):
self.idx = MafIndex("MAF/ucsc_mm9_chr10_bad.mafindex",
"MAF/ucsc_mm9_chr10_bad.maf", "mm9.chr10")
self.idx = MafIndex(
"MAF/ucsc_mm9_chr10_bad.mafindex",
"MAF/ucsc_mm9_chr10_bad.maf",
"mm9.chr10",
)
self.assertEqual(len(self.idx), 48)
def test_inconsistent_strand(self):
self.assertRaises(ValueError,
self.idx.get_spliced,
(0, 3021421), (1000, 3022000), 1)
self.assertRaises(
ValueError, self.idx.get_spliced, (0, 3021421), (1000, 3022000), 1
)
def test_bundle_without_target(self):
self.assertRaises(ValueError,
self.idx.get_spliced,
(3009319,), (3009900,), 1)
self.assertRaises(
ValueError, self.idx.get_spliced, (3009319,), (3009900,), 1
)
if __name__ == "__main__":
runner = unittest.TextTestRunner(verbosity=2)

View File

@ -15,32 +15,41 @@ os.environ["LANG"] = "C"
mafft_exe = None
if sys.platform == "win32":
raise MissingExternalDependencyError("Testing with MAFFT not implemented on Windows yet")
raise MissingExternalDependencyError(
"Testing with MAFFT not implemented on Windows yet"
)
else:
from subprocess import getoutput
output = getoutput("mafft -help")
if "not found" not in output and "not recognized" not in output:
if "MAFFT" in output:
mafft_exe = "mafft"
if not mafft_exe:
raise MissingExternalDependencyError(
"Install MAFFT if you want to use the Bio.Align.Applications wrapper.")
"Install MAFFT if you want to use the Bio.Align.Applications wrapper."
)
def check_mafft_version(mafft_exe):
child = subprocess.Popen("%s --help" % mafft_exe,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True,
shell=(sys.platform != "win32"))
child = subprocess.Popen(
"%s --help" % mafft_exe,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True,
shell=(sys.platform != "win32"),
)
stdoutdata, stderrdata = child.communicate()
output = stdoutdata + "\n" + stderrdata
return_code = child.returncode
del child
if "correctly installed?" in output \
or "mafft binaries have to be installed" in output:
if (
"correctly installed?" in output
or "mafft binaries have to be installed" in output
):
raise MissingExternalDependencyError(
"MAFFT does not seem to be correctly installed.")
"MAFFT does not seem to be correctly installed."
)
# e.g. "MAFFT version 5.732 (2005/09/14)\n"
# e.g. " MAFFT v6.717b (2009/12/03)\n"
@ -48,11 +57,12 @@ def check_mafft_version(mafft_exe):
index = output.find(marker)
if index == -1:
continue
version = output[index + len(marker):].strip().split(None, 1)[0]
version = output[index + len(marker) :].strip().split(None, 1)[0]
major = int(version.split(".", 1)[0])
if major < 6:
raise MissingExternalDependencyError("Test requires MAFFT v6 or "
"later (found %s)." % version)
raise MissingExternalDependencyError(
"Test requires MAFFT v6 or later (found %s)." % version
)
return (major, version)
raise MissingExternalDependencyError("Couldn't determine MAFFT version.")
@ -62,7 +72,6 @@ version_major, version_string = check_mafft_version(mafft_exe)
class MafftApplication(unittest.TestCase):
def setUp(self):
self.infile1 = "Fasta/f002"
@ -79,8 +88,11 @@ class MafftApplication(unittest.TestCase):
self.assertTrue(stdoutdata.startswith(">gi|1348912|gb|G26680|G26680"))
# Used to get "Progressive alignment ..." but in v7.245
# became "Progressive alignment 1/2..." and "Progressive alignment 2/2..."
self.assertTrue(("Progressive alignment ..." in stderrdata) or
("Progressive alignment 1/" in stderrdata), stderrdata)
self.assertTrue(
("Progressive alignment ..." in stderrdata)
or ("Progressive alignment 1/" in stderrdata),
stderrdata,
)
self.assertNotIn("$#=0", stderrdata)
def test_Mafft_with_options(self):
@ -108,34 +120,38 @@ class MafftApplication(unittest.TestCase):
self.assertNotIn("$#=0", stderrdata)
if version_major >= 7:
def test_Mafft_with_PHYLIP_output(self):
"""Simple round-trip through app with PHYLIP output."""
cmdline = MafftCommandline(mafft_exe, input=self.infile1,
phylipout=True)
cmdline = MafftCommandline(mafft_exe, input=self.infile1, phylipout=True)
self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
stdoutdata, stderrdata = cmdline()
# e.g. " 3 706\n" or " 3 681" but allow some variation in the column count
self.assertTrue(stdoutdata.startswith(" 3 68") or
stdoutdata.startswith(" 3 69") or
stdoutdata.startswith(" 3 70"), stdoutdata)
self.assertIn("gi|1348912 ", stdoutdata,
stdoutdata)
self.assertNotIn("gi|1348912|gb|G26680|G26680", stdoutdata,
stdoutdata)
self.assertTrue(
stdoutdata.startswith(" 3 68")
or stdoutdata.startswith(" 3 69")
or stdoutdata.startswith(" 3 70"),
stdoutdata,
)
self.assertIn("gi|1348912 ", stdoutdata, stdoutdata)
self.assertNotIn("gi|1348912|gb|G26680|G26680", stdoutdata, stdoutdata)
self.assertNotIn("$#=0", stderrdata)
def test_Mafft_with_PHYLIP_namelength(self):
"""Check PHYLIP with --namelength."""
cmdline = MafftCommandline(mafft_exe, input=self.infile1,
phylipout=True, namelength=50)
cmdline = MafftCommandline(
mafft_exe, input=self.infile1, phylipout=True, namelength=50
)
self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
stdoutdata, stderrdata = cmdline()
# e.g. " 3 706\n" or " 3 681" but allow some variation in the column count
self.assertTrue(stdoutdata.startswith(" 3 68") or
stdoutdata.startswith(" 3 69") or
stdoutdata.startswith(" 3 70"), stdoutdata)
self.assertIn("gi|1348912|gb|G26680|G26680", stdoutdata,
stdoutdata)
self.assertTrue(
stdoutdata.startswith(" 3 68")
or stdoutdata.startswith(" 3 69")
or stdoutdata.startswith(" 3 70"),
stdoutdata,
)
self.assertIn("gi|1348912|gb|G26680|G26680", stdoutdata, stdoutdata)
self.assertNotIn("$#=0", stderrdata)
def test_Mafft_with_complex_command_line(self):
@ -155,11 +171,14 @@ class MafftApplication(unittest.TestCase):
cmdline.set_parameter("--treeout", True)
cmdline.set_parameter("nuc", True)
self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
self.assertEqual(str(cmdline), mafft_exe +
" --localpair --weighti 4.2 --retree 5 " +
"--maxiterate 200 --nofft --op 2.04 --ep 0.51" +
" --lop 0.233 --lep 0.2 --reorder --treeout" +
" --nuc Fasta/f002")
self.assertEqual(
str(cmdline),
mafft_exe
+ " --localpair --weighti 4.2 --retree 5 "
+ "--maxiterate 200 --nofft --op 2.04 --ep 0.51"
+ " --lop 0.233 --lep 0.2 --reorder --treeout"
+ " --nuc Fasta/f002",
)
stdoutdata, stderrdata = cmdline()
self.assertTrue(stdoutdata.startswith(">gi|1348912|gb|G26680|G26680"))
self.assertNotIn("$#=0", stderrdata)

View File

@ -24,8 +24,10 @@ try:
from numpy import log
except ImportError:
from Bio import MissingPythonDependencyError
raise MissingPythonDependencyError(
"Install NumPy if you want to use Bio.MarkovModel.") from None
"Install NumPy if you want to use Bio.MarkovModel."
) from None
with warnings.catch_warnings():
# Silence this warning:
@ -35,7 +37,6 @@ with warnings.catch_warnings():
class TestMarkovModel(unittest.TestCase):
def test_train_visible(self):
states = ["0", "1", "2", "3"]
alphabet = ["A", "C", "G", "T"]
@ -84,78 +85,55 @@ class TestMarkovModel(unittest.TestCase):
self.assertEqual(len(markov_model.p_emission[1]), 4)
self.assertEqual(len(markov_model.p_emission[2]), 4)
self.assertEqual(len(markov_model.p_emission[3]), 4)
self.assertAlmostEqual(markov_model.p_emission[0][0], 0.666667,
places=4)
self.assertAlmostEqual(markov_model.p_emission[0][1], 0.111111,
places=4)
self.assertAlmostEqual(markov_model.p_emission[0][2], 0.111111,
places=4)
self.assertAlmostEqual(markov_model.p_emission[0][3], 0.111111,
places=4)
self.assertAlmostEqual(markov_model.p_emission[1][0], 0.083333,
places=4)
self.assertAlmostEqual(markov_model.p_emission[1][1], 0.750000,
places=4)
self.assertAlmostEqual(markov_model.p_emission[1][2], 0.083333,
places=4)
self.assertAlmostEqual(markov_model.p_emission[1][3], 0.083333,
places=4)
self.assertAlmostEqual(markov_model.p_emission[2][0], 0.083333,
places=4)
self.assertAlmostEqual(markov_model.p_emission[2][1], 0.083333,
places=4)
self.assertAlmostEqual(markov_model.p_emission[2][2], 0.750000,
places=4)
self.assertAlmostEqual(markov_model.p_emission[2][3], 0.083333,
places=4)
self.assertAlmostEqual(markov_model.p_emission[3][0], 0.031250,
places=4)
self.assertAlmostEqual(markov_model.p_emission[3][1], 0.031250,
places=4)
self.assertAlmostEqual(markov_model.p_emission[3][2], 0.031250,
places=4)
self.assertAlmostEqual(markov_model.p_emission[3][3], 0.906250,
places=4)
self.assertAlmostEqual(markov_model.p_emission[0][0], 0.666667, places=4)
self.assertAlmostEqual(markov_model.p_emission[0][1], 0.111111, places=4)
self.assertAlmostEqual(markov_model.p_emission[0][2], 0.111111, places=4)
self.assertAlmostEqual(markov_model.p_emission[0][3], 0.111111, places=4)
self.assertAlmostEqual(markov_model.p_emission[1][0], 0.083333, places=4)
self.assertAlmostEqual(markov_model.p_emission[1][1], 0.750000, places=4)
self.assertAlmostEqual(markov_model.p_emission[1][2], 0.083333, places=4)
self.assertAlmostEqual(markov_model.p_emission[1][3], 0.083333, places=4)
self.assertAlmostEqual(markov_model.p_emission[2][0], 0.083333, places=4)
self.assertAlmostEqual(markov_model.p_emission[2][1], 0.083333, places=4)
self.assertAlmostEqual(markov_model.p_emission[2][2], 0.750000, places=4)
self.assertAlmostEqual(markov_model.p_emission[2][3], 0.083333, places=4)
self.assertAlmostEqual(markov_model.p_emission[3][0], 0.031250, places=4)
self.assertAlmostEqual(markov_model.p_emission[3][1], 0.031250, places=4)
self.assertAlmostEqual(markov_model.p_emission[3][2], 0.031250, places=4)
self.assertAlmostEqual(markov_model.p_emission[3][3], 0.906250, places=4)
def test_baum_welch(self):
states = ["CP", "IP"]
alphabet = ["cola", "ice_t", "lem"]
outputs = [
(2, 1, 0)
]
outputs = [(2, 1, 0)]
p_initial = [1.0, 0.0000001]
p_transition = [[0.7, 0.3],
[0.5, 0.5]]
p_emission = [[0.6, 0.1, 0.3],
[0.1, 0.7, 0.2]]
p_transition = [[0.7, 0.3], [0.5, 0.5]]
p_emission = [[0.6, 0.1, 0.3], [0.1, 0.7, 0.2]]
N, M = len(states), len(alphabet)
x = MarkovModel._baum_welch(N, M, outputs,
p_initial=p_initial,
p_transition=p_transition,
p_emission=p_emission
)
x = MarkovModel._baum_welch(
N,
M,
outputs,
p_initial=p_initial,
p_transition=p_transition,
p_emission=p_emission,
)
p_initial, p_transition, p_emission = x
markov_model = MarkovModel.MarkovModel(states, alphabet,
p_initial, p_transition,
p_emission)
markov_model = MarkovModel.MarkovModel(
states, alphabet, p_initial, p_transition, p_emission
)
self.assertEqual(markov_model.states, ["CP", "IP"])
self.assertEqual(markov_model.alphabet, ["cola", "ice_t", "lem"])
self.assertEqual(len(markov_model.p_initial), 2)
self.assertAlmostEqual(markov_model.p_initial[0], 1.0,
places=4)
self.assertAlmostEqual(markov_model.p_initial[1], 0.0,
places=4)
self.assertAlmostEqual(markov_model.p_initial[0], 1.0, places=4)
self.assertAlmostEqual(markov_model.p_initial[1], 0.0, places=4)
self.assertEqual(len(markov_model.p_transition), 2)
self.assertEqual(len(markov_model.p_transition[0]), 2)
self.assertEqual(len(markov_model.p_transition[1]), 2)
self.assertAlmostEqual(markov_model.p_transition[0][0], 0.02460365,
places=4)
self.assertAlmostEqual(markov_model.p_transition[0][1], 0.97539634,
places=4)
self.assertAlmostEqual(markov_model.p_transition[1][0], 1.0,
places=4)
self.assertAlmostEqual(markov_model.p_transition[1][1], 0.0,
places=4)
self.assertAlmostEqual(markov_model.p_transition[0][0], 0.02460365, places=4)
self.assertAlmostEqual(markov_model.p_transition[0][1], 0.97539634, places=4)
self.assertAlmostEqual(markov_model.p_transition[1][0], 1.0, places=4)
self.assertAlmostEqual(markov_model.p_transition[1][1], 0.0, places=4)
self.assertEqual(len(markov_model.p_emission), 2)
self.assertEqual(len(markov_model.p_emission[0]), 3)
self.assertEqual(len(markov_model.p_emission[1]), 3)
@ -173,12 +151,11 @@ class TestMarkovModel(unittest.TestCase):
states = "NR"
alphabet = "AGTC"
p_initial = array([1.0, 0.0])
p_transition = array([[0.90, 0.10],
[0.20, 0.80]])
p_emission = array([[0.30, 0.20, 0.30, 0.20],
[0.10, 0.40, 0.10, 0.40]])
p_transition = array([[0.90, 0.10], [0.20, 0.80]])
p_emission = array([[0.30, 0.20, 0.30, 0.20], [0.10, 0.40, 0.10, 0.40]])
markov_model = MarkovModel.MarkovModel(
states, alphabet, p_initial, p_transition, p_emission)
states, alphabet, p_initial, p_transition, p_emission
)
states = MarkovModel.find_states(markov_model, "TGCC")
self.assertEqual(len(states), 1)
state_list, state_float = states[0]
@ -189,60 +166,90 @@ class TestMarkovModel(unittest.TestCase):
states = "NR"
alphabet = "AGTC"
p_initial = array([1.0, 0.0])
p_transition = array([[0.56, 0.44],
[0.25, 0.75]])
p_emission = array([[0.04, 0.14, 0.62, 0.20],
[0.39, 0.15, 0.04, 0.42]])
p_transition = array([[0.56, 0.44], [0.25, 0.75]])
p_emission = array([[0.04, 0.14, 0.62, 0.20], [0.39, 0.15, 0.04, 0.42]])
markov_model = MarkovModel.MarkovModel(
states, alphabet, p_initial, p_transition, p_emission)
states, alphabet, p_initial, p_transition, p_emission
)
states = MarkovModel.find_states(markov_model, "CCTGAGTTAGTCGT")
self.assertEqual(len(states), 1)
state_list, state_float = states[0]
self.assertEqual(state_list, ["N", "N", "N", "R", "R", "R", "N", "N", "R", "R", "N", "R", "R", "N"])
self.assertEqual(
state_list,
["N", "N", "N", "R", "R", "R", "N", "N", "R", "R", "N", "R", "R", "N"],
)
def test_topcoder3(self):
# NRRRRRRRRRRRNNNNRRRRRRRRR
states = "NR"
alphabet = "AGTC"
p_initial = array([1.0, 0.0])
p_transition = array([[0.75, 0.25],
[0.25, 0.75]])
p_emission = array([[0.45, 0.36, 0.06, 0.13],
[0.24, 0.18, 0.12, 0.46]])
p_transition = array([[0.75, 0.25], [0.25, 0.75]])
p_emission = array([[0.45, 0.36, 0.06, 0.13], [0.24, 0.18, 0.12, 0.46]])
markov_model = MarkovModel.MarkovModel(
states, alphabet, p_initial, p_transition, p_emission)
states, alphabet, p_initial, p_transition, p_emission
)
states = MarkovModel.find_states(markov_model, "CCGTACTTACCCAGGACCGCAGTCC")
self.assertEqual(len(states), 1)
state_list, state_float = states[0]
self.assertEqual(state_list, ["N", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "N", "N", "N", "N", "R", "R", "R", "R", "R", "R", "R", "R", "R"])
self.assertEqual(
state_list,
[
"N",
"R",
"R",
"R",
"R",
"R",
"R",
"R",
"R",
"R",
"R",
"R",
"N",
"N",
"N",
"N",
"R",
"R",
"R",
"R",
"R",
"R",
"R",
"R",
"R",
],
)
def test_topcoder4(self):
# NRRRRRRRRRR
states = "NR"
alphabet = "AGTC"
p_initial = array([1.0, 0.0])
p_transition = array([[0.55, 0.45],
[0.15, 0.85]])
p_emission = array([[0.75, 0.03, 0.01, 0.21],
[0.34, 0.11, 0.39, 0.16]])
p_transition = array([[0.55, 0.45], [0.15, 0.85]])
p_emission = array([[0.75, 0.03, 0.01, 0.21], [0.34, 0.11, 0.39, 0.16]])
markov_model = MarkovModel.MarkovModel(
states, alphabet, p_initial, p_transition, p_emission)
states, alphabet, p_initial, p_transition, p_emission
)
states = MarkovModel.find_states(markov_model, "TTAGCAGTGCG")
self.assertEqual(len(states), 1)
state_list, state_float = states[0]
self.assertEqual(state_list, ["N", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R"])
self.assertEqual(
state_list, ["N", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R"]
)
def test_topcoder5(self):
# N
states = "NR"
alphabet = "AGTC"
p_initial = array([1.0, 0.0])
p_transition = array([[0.84, 0.16],
[0.25, 0.75]])
p_emission = array([[0.26, 0.37, 0.08, 0.29],
[0.31, 0.13, 0.33, 0.23]])
p_transition = array([[0.84, 0.16], [0.25, 0.75]])
p_emission = array([[0.26, 0.37, 0.08, 0.29], [0.31, 0.13, 0.33, 0.23]])
markov_model = MarkovModel.MarkovModel(
states, alphabet, p_initial, p_transition, p_emission)
states, alphabet, p_initial, p_transition, p_emission
)
states = MarkovModel.find_states(markov_model, "T")
self.assertEqual(len(states), 1)
state_list, state_float = states[0]
@ -256,22 +263,17 @@ class TestMarkovModel(unittest.TestCase):
line = "This is a \n string with two lines \n"
handle = StringIO(line)
start = "This is a \n"
self.assertEqual(
start, MarkovModel._readline_and_check_start(handle, start))
self.assertEqual(start, MarkovModel._readline_and_check_start(handle, start))
def test_save_and_load(self):
states = "NR"
alphabet = "AGTC"
p_initial = array([1.0, 0.0])
p_transition = array([[0.75, 0.25], [0.25, 0.75]])
p_emission = array(
[[0.45, 0.36, 0.06, 0.13], [0.24, 0.18, 0.12, 0.46]])
p_emission = array([[0.45, 0.36, 0.06, 0.13], [0.24, 0.18, 0.12, 0.46]])
markov_model_save = MarkovModel.MarkovModel(
states,
alphabet,
p_initial,
p_transition,
p_emission)
states, alphabet, p_initial, p_transition, p_emission
)
handle = StringIO()
MarkovModel.save(markov_model_save, handle)
@ -281,54 +283,59 @@ class TestMarkovModel(unittest.TestCase):
self.assertEqual("".join(markov_model_load.states), states)
self.assertEqual("".join(markov_model_load.alphabet), alphabet)
self.assertTrue(array_equal(markov_model_load.p_initial, p_initial))
self.assertTrue(array_equal
(markov_model_load.p_transition, p_transition))
self.assertTrue(array_equal(markov_model_load.p_transition, p_transition))
self.assertTrue(array_equal(markov_model_load.p_emission, p_emission))
def test_train_bw(self):
random.seed(0)
states = ["0", "1", "2", "3"]
alphabet = ["A", "C", "G", "T"]
training_data = ["AACCCGGGTTTTTTT", "ACCGTTTTTTT",
"ACGGGTTTTTT", "ACCGTTTTTTTT"]
training_data = [
"AACCCGGGTTTTTTT",
"ACCGTTTTTTT",
"ACGGGTTTTTT",
"ACCGTTTTTTTT",
]
output_p_initial = array([0.2275677, 0.29655611,
0.24993822, 0.22593797])
output_p_initial = array([0.2275677, 0.29655611, 0.24993822, 0.22593797])
output_p_transition = array(
[[5.16919807e-001, 3.65825814e-033, 4.83080193e-001, 9.23220689e-042],
[3.65130247e-001,
1.00000000e-300,
6.34869753e-001,
1.00000000e-300],
[8.68776164e-001,
1.02254304e-034,
1.31223836e-001,
6.21835051e-047],
[3.33333333e-301, 3.33333333e-001, 3.33333333e-301, 6.66666667e-001]])
[
[5.16919807e-001, 3.65825814e-033, 4.83080193e-001, 9.23220689e-042],
[3.65130247e-001, 1.00000000e-300, 6.34869753e-001, 1.00000000e-300],
[8.68776164e-001, 1.02254304e-034, 1.31223836e-001, 6.21835051e-047],
[3.33333333e-301, 3.33333333e-001, 3.33333333e-301, 6.66666667e-001],
]
)
output_p_emission = array(
[[2.02593570e-301, 2.02593570e-301, 2.02593570e-301, 1.00000000e+000],
[1.00000000e-300,
1.00000000e-300,
1.00000000e+000,
1.09629016e-259],
[3.26369779e-301,
3.26369779e-301,
3.26369779e-301,
1.00000000e+000],
[3.33333333e-001, 6.66666667e-001, 3.33333333e-301, 3.33333333e-301]])
[
[2.02593570e-301, 2.02593570e-301, 2.02593570e-301, 1.00000000e000],
[1.00000000e-300, 1.00000000e-300, 1.00000000e000, 1.09629016e-259],
[3.26369779e-301, 3.26369779e-301, 3.26369779e-301, 1.00000000e000],
[3.33333333e-001, 6.66666667e-001, 3.33333333e-301, 3.33333333e-301],
]
)
markov_model = MarkovModel.train_bw(states, alphabet, training_data)
self.assertEqual("".join(markov_model.states), "".join(states))
self.assertEqual("".join(markov_model.alphabet), "".join(alphabet))
self.assertTrue(array_equal(
around(markov_model.p_initial, decimals=3),
around(output_p_initial, decimals=3)))
self.assertTrue(array_equal(around(
markov_model.p_transition, decimals=3),
around(output_p_transition, decimals=3)))
self.assertTrue(array_equal(around(
markov_model.p_emission, decimals=3),
around(output_p_emission, decimals=3)))
self.assertTrue(
array_equal(
around(markov_model.p_initial, decimals=3),
around(output_p_initial, decimals=3),
)
)
self.assertTrue(
array_equal(
around(markov_model.p_transition, decimals=3),
around(output_p_transition, decimals=3),
)
)
self.assertTrue(
array_equal(
around(markov_model.p_emission, decimals=3),
around(output_p_emission, decimals=3),
)
)
def test_forward(self):
states = ["CP", "IP"]
@ -337,15 +344,28 @@ class TestMarkovModel(unittest.TestCase):
lp_transition = log([[0.7, 0.3], [0.5, 0.5]])
lp_emission = log([[0.6, 0.1, 0.3], [0.1, 0.7, 0.2]])
matrix = array([[0., -1.5606477, -3.07477539, -3.84932984],
[-16.11809565, -2.4079455, -3.27544608, -4.5847794]])
matrix = array(
[
[0.0, -1.5606477, -3.07477539, -3.84932984],
[-16.11809565, -2.4079455, -3.27544608, -4.5847794],
]
)
self.assertTrue(
array_equal(around(MarkovModel._forward(len(states), len(outputs),
lp_initial,
lp_transition,
lp_emission,
outputs), decimals=3),
around(matrix, decimals=3)))
array_equal(
around(
MarkovModel._forward(
len(states),
len(outputs),
lp_initial,
lp_transition,
lp_emission,
outputs,
),
decimals=3,
),
around(matrix, decimals=3),
)
)
def test_backward(self):
states = ["CP", "IP"]
@ -353,43 +373,91 @@ class TestMarkovModel(unittest.TestCase):
lp_transition = log([[0.7, 0.3], [0.5, 0.5]])
lp_emission = log([[0.6, 0.1, 0.3], [0.1, 0.7, 0.2]])
matrix = array([[-3.45776773, -3.10109279, -0.51082562, 0.],
[-3.54045945, -1.40649707, -2.30258509, 0.]])
matrix = array(
[
[-3.45776773, -3.10109279, -0.51082562, 0.0],
[-3.54045945, -1.40649707, -2.30258509, 0.0],
]
)
self.assertTrue(
array_equal(around(MarkovModel._backward(
len(states), len(outputs), lp_transition, lp_emission, outputs), decimals=3),
around(matrix, decimals=3)))
array_equal(
around(
MarkovModel._backward(
len(states), len(outputs), lp_transition, lp_emission, outputs
),
decimals=3,
),
around(matrix, decimals=3),
)
)
def test_mle(self):
states = ["0", "1", "2", "3"]
alphabet = ["A", "C", "G", "T"]
training_data = [("AACCCGGGTTTTTTT", "001112223333333"),
("ACCGTTTTTTT", "01123333333"),
("ACGGGTTTTTT", "01222333333"),
("ACCGTTTTTTTT", "011233333333"), ]
training_outputs = array([[0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3], [
0, 1, 1, 2, 3, 3, 3, 3, 3, 3, 3], [0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3], [0, 1, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3]])
training_states = array([[0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3], [
0, 1, 1, 2, 3, 3, 3, 3, 3, 3, 3], [0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3], [0, 1, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3]])
training_data = [
("AACCCGGGTTTTTTT", "001112223333333"),
("ACCGTTTTTTT", "01123333333"),
("ACGGGTTTTTT", "01222333333"),
("ACCGTTTTTTTT", "011233333333"),
]
training_outputs = array(
[
[0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3],
[0, 1, 1, 2, 3, 3, 3, 3, 3, 3, 3],
[0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3],
[0, 1, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3],
]
)
training_states = array(
[
[0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3],
[0, 1, 1, 2, 3, 3, 3, 3, 3, 3, 3],
[0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3],
[0, 1, 1, 2, 3, 3, 3, 3, 3, 3, 3, 3],
]
)
p_initial = array([1., 0., 0., 0.])
p_transition = array([[0.2, 0.8, 0., 0.],
[0., 0.5, 0.5, 0.],
[0., 0., 0.5, 0.5],
[0., 0., 0., 1.]])
p_initial = array([1.0, 0.0, 0.0, 0.0])
p_transition = array(
[
[0.2, 0.8, 0.0, 0.0],
[0.0, 0.5, 0.5, 0.0],
[0.0, 0.0, 0.5, 0.5],
[0.0, 0.0, 0.0, 1.0],
]
)
p_emission = array(
[[0.66666667, 0.11111111, 0.11111111, 0.11111111],
[0.08333333, 0.75, 0.08333333, 0.08333333],
[0.08333333, 0.08333333, 0.75, 0.08333333],
[0.03125, 0.03125, 0.03125, 0.90625]])
[
[0.66666667, 0.11111111, 0.11111111, 0.11111111],
[0.08333333, 0.75, 0.08333333, 0.08333333],
[0.08333333, 0.08333333, 0.75, 0.08333333],
[0.03125, 0.03125, 0.03125, 0.90625],
]
)
p_initial_out, p_transition_out, p_emission_out = MarkovModel._mle(
len(states), len(alphabet), training_outputs, training_states, None, None, None)
len(states),
len(alphabet),
training_outputs,
training_states,
None,
None,
None,
)
self.assertTrue(
array_equal(around(p_initial_out, decimals=3), around(p_initial, decimals=3)))
array_equal(
around(p_initial_out, decimals=3), around(p_initial, decimals=3)
)
)
self.assertTrue(
array_equal(around(p_transition_out, decimals=3), around(p_transition, decimals=3)))
array_equal(
around(p_transition_out, decimals=3), around(p_transition, decimals=3)
)
)
self.assertTrue(
array_equal(around(p_emission_out, decimals=3), around(p_emission, decimals=3)))
array_equal(
around(p_emission_out, decimals=3), around(p_emission, decimals=3)
)
)
def test_argmaxes(self):
matrix = array([[4, 5, 6], [9, 7, 8], [1, 2, 3]])
@ -408,77 +476,110 @@ class TestMarkovModel(unittest.TestCase):
output2 = -3.968593356916541
viterbi_output = MarkovModel._viterbi(
len(states), lp_initial, lp_transition,
lp_emission, outputs)
len(states), lp_initial, lp_transition, lp_emission, outputs
)
self.assertEqual(len(viterbi_output[0][0]), 3)
self.assertEqual(viterbi_output[0][0][0], output1[0])
self.assertEqual(viterbi_output[0][0][1], output1[1])
self.assertEqual(viterbi_output[0][0][2], output1[2])
self.assertEqual(
float("%.3f" % viterbi_output[0][1]),
float("%.3f" % output2))
self.assertEqual(float("%.3f" % viterbi_output[0][1]), float("%.3f" % output2))
def test_normalize_and_copy_and_check(self):
matrix_in1 = array(
[[1.1, 2.2, 3.3], [4.4, 5.5, 6.6], [7.7, 8.8, 9.9]])
matrix_in1 = array([[1.1, 2.2, 3.3], [4.4, 5.5, 6.6], [7.7, 8.8, 9.9]])
matrix_in2 = array([1, 2, 3])
matrix_out1 = array(
[[0.16666667, 0.33333333, 0.5], [0.26666667, 0.33333333, 0.4], [0.29166667, 0.33333333, 0.375]])
[
[0.16666667, 0.33333333, 0.5],
[0.26666667, 0.33333333, 0.4],
[0.29166667, 0.33333333, 0.375],
]
)
matrix_out2 = array([0.16666667, 0.33333333, 0.5])
self.assertTrue(
array_equal(around(MarkovModel._normalize(matrix_in1), decimals=3), around(matrix_out1, decimals=3)))
array_equal(
around(MarkovModel._normalize(matrix_in1), decimals=3),
around(matrix_out1, decimals=3),
)
)
self.assertTrue(
array_equal(around(MarkovModel._normalize(matrix_in2), decimals=3), around(matrix_out2, decimals=3)))
array_equal(
around(MarkovModel._normalize(matrix_in2), decimals=3),
around(matrix_out2, decimals=3),
)
)
shape1 = (3, 3)
shape2 = (3,)
self.assertTrue(
array_equal(around(MarkovModel._copy_and_check(matrix_out1, shape1), decimals=3), around(matrix_out1, decimals=3)))
array_equal(
around(MarkovModel._copy_and_check(matrix_out1, shape1), decimals=3),
around(matrix_out1, decimals=3),
)
)
self.assertTrue(
array_equal(around(MarkovModel._copy_and_check(matrix_out2, shape2), decimals=3), around(matrix_out2, decimals=3)))
array_equal(
around(MarkovModel._copy_and_check(matrix_out2, shape2), decimals=3),
around(matrix_out2, decimals=3),
)
)
def test_uniform_norm(self):
shape = (4, 3)
matrix = array([[0.33333333, 0.33333333, 0.33333333],
[0.33333333, 0.33333333, 0.33333333],
[0.33333333, 0.33333333, 0.33333333],
[0.33333333, 0.33333333, 0.33333333]])
matrix = array(
[
[0.33333333, 0.33333333, 0.33333333],
[0.33333333, 0.33333333, 0.33333333],
[0.33333333, 0.33333333, 0.33333333],
[0.33333333, 0.33333333, 0.33333333],
]
)
self.assertTrue(
array_equal(around(MarkovModel._uniform_norm(shape), decimals=3), around(matrix, decimals=3)))
array_equal(
around(MarkovModel._uniform_norm(shape), decimals=3),
around(matrix, decimals=3),
)
)
def test_random_norm(self):
random.seed(0)
shape = (4, 3)
matrix = array([[0.29399155, 0.38311672, 0.32289173],
[0.33750765, 0.26241723, 0.40007512],
[0.1908342, 0.38890714, 0.42025866],
[0.22501625, 0.46461061, 0.31037314]])
matrix = array(
[
[0.29399155, 0.38311672, 0.32289173],
[0.33750765, 0.26241723, 0.40007512],
[0.1908342, 0.38890714, 0.42025866],
[0.22501625, 0.46461061, 0.31037314],
]
)
self.assertTrue(
array_equal(around(MarkovModel._random_norm(shape), decimals=3), around(matrix, decimals=3)))
array_equal(
around(MarkovModel._random_norm(shape), decimals=3),
around(matrix, decimals=3),
)
)
def test_logsum_and_exp_logsum(self):
matrix = array(
[[1.1, 2.2, 3.3], [4.4, 5.5, 6.6], [7.7, 8.8, 9.9]])
matrix = array([[1.1, 2.2, 3.3], [4.4, 5.5, 6.6], [7.7, 8.8, 9.9]])
matrix1 = array([1, 2, 3])
output = 10.304721798
output1 = 3.40760596444
self.assertEqual(
float("%.3f" % MarkovModel._logsum(matrix)),
float("%.3f" % output))
float("%.3f" % MarkovModel._logsum(matrix)), float("%.3f" % output)
)
self.assertEqual(
float("%.3f" % MarkovModel._logsum(matrix1)),
float("%.3f" % output1))
float("%.3f" % MarkovModel._logsum(matrix1)), float("%.3f" % output1)
)
output2 = 29873.342245
output3 = 30.1928748506
self.assertEqual(
float("%.3f" % MarkovModel._exp_logsum(matrix)),
float("%.3f" % output2))
float("%.3f" % MarkovModel._exp_logsum(matrix)), float("%.3f" % output2)
)
self.assertEqual(
float("%.3f" % MarkovModel._exp_logsum(matrix1)),
float("%.3f" % output3))
float("%.3f" % MarkovModel._exp_logsum(matrix1)), float("%.3f" % output3)
)
def test_logvecadd(self):
vec1 = log(array([1, 2, 3, 4]))
@ -486,7 +587,11 @@ class TestMarkovModel(unittest.TestCase):
sumvec = array([1.79175947, 2.07944154, 2.30258509, 2.48490665])
self.assertTrue(
array_equal(around(MarkovModel._logvecadd(vec1, vec2), decimals=3), around(sumvec, decimals=3)))
array_equal(
around(MarkovModel._logvecadd(vec1, vec2), decimals=3),
around(sumvec, decimals=3),
)
)
if __name__ == "__main__":

View File

@ -13,7 +13,6 @@ from Bio import Medline
class TestMedline(unittest.TestCase):
def test_read(self):
with open("Medline/pubmed_result1.txt") as handle:
record = Medline.read(handle)
@ -30,8 +29,13 @@ class TestMedline(unittest.TestCase):
self.assertEqual(record["DP"], "2002 Sep")
self.assertEqual(record["TI"], "The Bio* toolkits--a brief overview.")
self.assertEqual(record["PG"], "296-302")
self.assertEqual(record["AB"], "Bioinformatics research is often difficult to do with commercial software. The Open Source BioPerl, BioPython and Biojava projects provide toolkits with multiple functionality that make it easier to create customised pipelines or analysis. This review briefly compares the quirks of the underlying languages and the functionality, documentation, utility and relative advantages of the Bio counterparts, particularly from the point of view of the beginning biologist programmer.")
self.assertEqual(record["AD"], ["tacg Informatics, Irvine, CA 92612, USA. hjm@tacgi.com"])
self.assertEqual(
record["AB"],
"Bioinformatics research is often difficult to do with commercial software. The Open Source BioPerl, BioPython and Biojava projects provide toolkits with multiple functionality that make it easier to create customised pipelines or analysis. This review briefly compares the quirks of the underlying languages and the functionality, documentation, utility and relative advantages of the Bio counterparts, particularly from the point of view of the beginning biologist programmer.",
)
self.assertEqual(
record["AD"], ["tacg Informatics, Irvine, CA 92612, USA. hjm@tacgi.com"]
)
self.assertEqual(record["FAU"], ["Mangalam, Harry"])
self.assertEqual(record["AU"], ["Mangalam H"])
self.assertEqual(record["LA"], ["eng"])
@ -41,7 +45,18 @@ class TestMedline(unittest.TestCase):
self.assertEqual(record["JT"], "Briefings in bioinformatics")
self.assertEqual(record["JID"], "100912837")
self.assertEqual(record["SB"], "IM")
self.assertEqual(record["MH"], ["*Computational Biology", "Computer Systems", "Humans", "Internet", "*Programming Languages", "*Software", "User-Computer Interface"])
self.assertEqual(
record["MH"],
[
"*Computational Biology",
"Computer Systems",
"Humans",
"Internet",
"*Programming Languages",
"*Software",
"User-Computer Interface",
],
)
self.assertEqual(record["EDAT"], "2002/09/17 10:00")
self.assertEqual(record["MHDA"], "2003/06/07 05:00")
self.assertEqual(record["PST"], "ppublish")
@ -60,11 +75,25 @@ class TestMedline(unittest.TestCase):
self.assertEqual(record["IS"], "1471-2105 (Electronic)")
self.assertEqual(record["VI"], "7")
self.assertEqual(record["DP"], "2006")
self.assertEqual(record["TI"], "A high level interface to SCOP and ASTRAL implemented in python.")
self.assertEqual(
record["TI"],
"A high level interface to SCOP and ASTRAL implemented in python.",
)
self.assertEqual(record["PG"], "10")
self.assertEqual(record["AB"], "BACKGROUND: Benchmarking algorithms in structural bioinformatics often involves the construction of datasets of proteins with given sequence and structural properties. The SCOP database is a manually curated structural classification which groups together proteins on the basis of structural similarity. The ASTRAL compendium provides non redundant subsets of SCOP domains on the basis of sequence similarity such that no two domains in a given subset share more than a defined degree of sequence similarity. Taken together these two resources provide a 'ground truth' for assessing structural bioinformatics algorithms. We present a small and easy to use API written in python to enable construction of datasets from these resources. RESULTS: We have designed a set of python modules to provide an abstraction of the SCOP and ASTRAL databases. The modules are designed to work as part of the Biopython distribution. Python users can now manipulate and use the SCOP hierarchy from within python programs, and use ASTRAL to return sequences of domains in SCOP, as well as clustered representations of SCOP from ASTRAL. CONCLUSION: The modules make the analysis and generation of datasets for use in structural genomics easier and more principled.")
self.assertEqual(record["AD"], ["Bioinformatics, Institute of Cell and Molecular Science, School of Medicine and Dentistry, Queen Mary, University of London, London EC1 6BQ, UK. j.a.casbon@qmul.ac.uk"])
self.assertEqual(record["FAU"], ["Casbon, James A", "Crooks, Gavin E", "Saqi, Mansoor A S"])
self.assertEqual(
record["AB"],
"BACKGROUND: Benchmarking algorithms in structural bioinformatics often involves the construction of datasets of proteins with given sequence and structural properties. The SCOP database is a manually curated structural classification which groups together proteins on the basis of structural similarity. The ASTRAL compendium provides non redundant subsets of SCOP domains on the basis of sequence similarity such that no two domains in a given subset share more than a defined degree of sequence similarity. Taken together these two resources provide a 'ground truth' for assessing structural bioinformatics algorithms. We present a small and easy to use API written in python to enable construction of datasets from these resources. RESULTS: We have designed a set of python modules to provide an abstraction of the SCOP and ASTRAL databases. The modules are designed to work as part of the Biopython distribution. Python users can now manipulate and use the SCOP hierarchy from within python programs, and use ASTRAL to return sequences of domains in SCOP, as well as clustered representations of SCOP from ASTRAL. CONCLUSION: The modules make the analysis and generation of datasets for use in structural genomics easier and more principled.",
)
self.assertEqual(
record["AD"],
[
"Bioinformatics, Institute of Cell and Molecular Science, School of Medicine and Dentistry, Queen Mary, University of London, London EC1 6BQ, UK. j.a.casbon@qmul.ac.uk"
],
)
self.assertEqual(
record["FAU"],
["Casbon, James A", "Crooks, Gavin E", "Saqi, Mansoor A S"],
)
self.assertEqual(record["AU"], ["Casbon JA", "Crooks GE", "Saqi MA"])
self.assertEqual(record["LA"], ["eng"])
self.assertEqual(record["PT"], ["Evaluation Studies", "Journal Article"])
@ -74,12 +103,34 @@ class TestMedline(unittest.TestCase):
self.assertEqual(record["JT"], "BMC bioinformatics")
self.assertEqual(record["JID"], "100965194")
self.assertEqual(record["SB"], "IM")
self.assertEqual(record["MH"], ["*Database Management Systems", "*Databases, Protein", "Information Storage and Retrieval/*methods", "Programming Languages", "Sequence Alignment/*methods", "Sequence Analysis, Protein/*methods", "Sequence Homology, Amino Acid", "*Software", "*User-Computer Interface"])
self.assertEqual(
record["MH"],
[
"*Database Management Systems",
"*Databases, Protein",
"Information Storage and Retrieval/*methods",
"Programming Languages",
"Sequence Alignment/*methods",
"Sequence Analysis, Protein/*methods",
"Sequence Homology, Amino Acid",
"*Software",
"*User-Computer Interface",
],
)
self.assertEqual(record["PMC"], "PMC1373603")
self.assertEqual(record["EDAT"], "2006/01/13 09:00")
self.assertEqual(record["MHDA"], "2006/03/15 09:00")
self.assertEqual(record["PHST"], ["2005/06/17 [received]", "2006/01/10 [accepted]", "2006/01/10 [aheadofprint]"])
self.assertEqual(record["AID"], ["1471-2105-7-10 [pii]", "10.1186/1471-2105-7-10 [doi]"])
self.assertEqual(
record["PHST"],
[
"2005/06/17 [received]",
"2006/01/10 [accepted]",
"2006/01/10 [aheadofprint]",
],
)
self.assertEqual(
record["AID"], ["1471-2105-7-10 [pii]", "10.1186/1471-2105-7-10 [doi]"]
)
self.assertEqual(record["PST"], "epublish")
self.assertEqual(record["SO"], "BMC Bioinformatics. 2006 Jan 10;7:10.")
record = next(records)
@ -94,27 +145,67 @@ class TestMedline(unittest.TestCase):
self.assertEqual(record["VI"], "22")
self.assertEqual(record["IP"], "5")
self.assertEqual(record["DP"], "2006 Mar 1")
self.assertEqual(record["TI"], "GenomeDiagram: a python package for the visualization of large-scale genomic data.")
self.assertEqual(
record["TI"],
"GenomeDiagram: a python package for the visualization of large-scale genomic data.",
)
self.assertEqual(record["PG"], "616-7")
self.assertEqual(record["AB"], "SUMMARY: We present GenomeDiagram, a flexible, open-source Python module for the visualization of large-scale genomic, comparative genomic and other data with reference to a single chromosome or other biological sequence. GenomeDiagram may be used to generate publication-quality vector graphics, rastered images and in-line streamed graphics for webpages. The package integrates with datatypes from the BioPython project, and is available for Windows, Linux and Mac OS X systems. AVAILABILITY: GenomeDiagram is freely available as source code (under GNU Public License) at http://bioinf.scri.ac.uk/lp/programs.html, and requires Python 2.3 or higher, and recent versions of the ReportLab and BioPython packages. SUPPLEMENTARY INFORMATION: A user manual, example code and images are available at http://bioinf.scri.ac.uk/lp/programs.html.")
self.assertEqual(record["AD"], ["Plant Pathogen Programme, Scottish Crop Research Institute, Invergowrie, Dundee DD2 5DA, Scotland, UK. lpritc@scri.ac.uk"])
self.assertEqual(record["FAU"], ["Pritchard, Leighton", "White, Jennifer A", "Birch, Paul R J", "Toth, Ian K"])
self.assertEqual(record["AU"], ["Pritchard L", "White JA", "Birch PR", "Toth IK"])
self.assertEqual(
record["AB"],
"SUMMARY: We present GenomeDiagram, a flexible, open-source Python module for the visualization of large-scale genomic, comparative genomic and other data with reference to a single chromosome or other biological sequence. GenomeDiagram may be used to generate publication-quality vector graphics, rastered images and in-line streamed graphics for webpages. The package integrates with datatypes from the BioPython project, and is available for Windows, Linux and Mac OS X systems. AVAILABILITY: GenomeDiagram is freely available as source code (under GNU Public License) at http://bioinf.scri.ac.uk/lp/programs.html, and requires Python 2.3 or higher, and recent versions of the ReportLab and BioPython packages. SUPPLEMENTARY INFORMATION: A user manual, example code and images are available at http://bioinf.scri.ac.uk/lp/programs.html.",
)
self.assertEqual(
record["AD"],
[
"Plant Pathogen Programme, Scottish Crop Research Institute, Invergowrie, Dundee DD2 5DA, Scotland, UK. lpritc@scri.ac.uk"
],
)
self.assertEqual(
record["FAU"],
[
"Pritchard, Leighton",
"White, Jennifer A",
"Birch, Paul R J",
"Toth, Ian K",
],
)
self.assertEqual(
record["AU"], ["Pritchard L", "White JA", "Birch PR", "Toth IK"]
)
self.assertEqual(record["LA"], ["eng"])
self.assertEqual(record["PT"], ["Journal Article", "Research Support, Non-U.S. Gov't"])
self.assertEqual(
record["PT"], ["Journal Article", "Research Support, Non-U.S. Gov't"]
)
self.assertEqual(record["DEP"], "20051223")
self.assertEqual(record["PL"], "England")
self.assertEqual(record["TA"], "Bioinformatics")
self.assertEqual(record["JT"], "Bioinformatics (Oxford, England)")
self.assertEqual(record["JID"], "9808944")
self.assertEqual(record["SB"], "IM")
self.assertEqual(record["MH"], ["Chromosome Mapping/*methods", "*Computer Graphics", "*Database Management Systems", "*Databases, Genetic", "Information Storage and Retrieval/methods", "*Programming Languages", "*Software", "*User-Computer Interface"])
self.assertEqual(
record["MH"],
[
"Chromosome Mapping/*methods",
"*Computer Graphics",
"*Database Management Systems",
"*Databases, Genetic",
"Information Storage and Retrieval/methods",
"*Programming Languages",
"*Software",
"*User-Computer Interface",
],
)
self.assertEqual(record["EDAT"], "2005/12/27 09:00")
self.assertEqual(record["MHDA"], "2006/04/19 09:00")
self.assertEqual(record["PHST"], ["2005/12/23 [aheadofprint]"])
self.assertEqual(record["AID"], ["btk021 [pii]", "10.1093/bioinformatics/btk021 [doi]"])
self.assertEqual(
record["AID"], ["btk021 [pii]", "10.1093/bioinformatics/btk021 [doi]"]
)
self.assertEqual(record["PST"], "ppublish")
self.assertEqual(record["SO"], "Bioinformatics. 2006 Mar 1;22(5):616-7. Epub 2005 Dec 23.")
self.assertEqual(
record["SO"],
"Bioinformatics. 2006 Mar 1;22(5):616-7. Epub 2005 Dec 23.",
)
record = next(records)
self.assertEqual(record["PMID"], "14871861")
self.assertEqual(record["OWN"], "NLM")
@ -129,25 +220,62 @@ class TestMedline(unittest.TestCase):
self.assertEqual(record["DP"], "2004 Jun 12")
self.assertEqual(record["TI"], "Open source clustering software.")
self.assertEqual(record["PG"], "1453-4")
self.assertEqual(record["AB"], "SUMMARY: We have implemented k-means clustering, hierarchical clustering and self-organizing maps in a single multipurpose open-source library of C routines, callable from other C and C++ programs. Using this library, we have created an improved version of Michael Eisen's well-known Cluster program for Windows, Mac OS X and Linux/Unix. In addition, we generated a Python and a Perl interface to the C Clustering Library, thereby combining the flexibility of a scripting language with the speed of C. AVAILABILITY: The C Clustering Library and the corresponding Python C extension module Pycluster were released under the Python License, while the Perl module Algorithm::Cluster was released under the Artistic License. The GUI code Cluster 3.0 for Windows, Macintosh and Linux/Unix, as well as the corresponding command-line program, were released under the same license as the original Cluster code. The complete source code is available at http://bonsai.ims.u-tokyo.ac.jp/mdehoon/software/cluster. Alternatively, Algorithm::Cluster can be downloaded from CPAN, while Pycluster is also available as part of the Biopython distribution.")
self.assertEqual(record["AD"], ["Human Genome Center, Institute of Medical Science, University of Tokyo, 4-6-1 Shirokanedai, Minato-ku, Tokyo, 108-8639 Japan. mdehoon@ims.u-tokyo.ac.jp"])
self.assertEqual(record["FAU"], ["de Hoon, M J L", "Imoto, S", "Nolan, J", "Miyano, S"])
self.assertEqual(record["AU"], ["de Hoon MJ", "Imoto S", "Nolan J", "Miyano S"])
self.assertEqual(
record["AB"],
"SUMMARY: We have implemented k-means clustering, hierarchical clustering and self-organizing maps in a single multipurpose open-source library of C routines, callable from other C and C++ programs. Using this library, we have created an improved version of Michael Eisen's well-known Cluster program for Windows, Mac OS X and Linux/Unix. In addition, we generated a Python and a Perl interface to the C Clustering Library, thereby combining the flexibility of a scripting language with the speed of C. AVAILABILITY: The C Clustering Library and the corresponding Python C extension module Pycluster were released under the Python License, while the Perl module Algorithm::Cluster was released under the Artistic License. The GUI code Cluster 3.0 for Windows, Macintosh and Linux/Unix, as well as the corresponding command-line program, were released under the same license as the original Cluster code. The complete source code is available at http://bonsai.ims.u-tokyo.ac.jp/mdehoon/software/cluster. Alternatively, Algorithm::Cluster can be downloaded from CPAN, while Pycluster is also available as part of the Biopython distribution.",
)
self.assertEqual(
record["AD"],
[
"Human Genome Center, Institute of Medical Science, University of Tokyo, 4-6-1 Shirokanedai, Minato-ku, Tokyo, 108-8639 Japan. mdehoon@ims.u-tokyo.ac.jp"
],
)
self.assertEqual(
record["FAU"], ["de Hoon, M J L", "Imoto, S", "Nolan, J", "Miyano, S"]
)
self.assertEqual(
record["AU"], ["de Hoon MJ", "Imoto S", "Nolan J", "Miyano S"]
)
self.assertEqual(record["LA"], ["eng"])
self.assertEqual(record["PT"], ["Comparative Study", "Evaluation Studies", "Journal Article", "Validation Studies"])
self.assertEqual(
record["PT"],
[
"Comparative Study",
"Evaluation Studies",
"Journal Article",
"Validation Studies",
],
)
self.assertEqual(record["DEP"], "20040210")
self.assertEqual(record["PL"], "England")
self.assertEqual(record["TA"], "Bioinformatics")
self.assertEqual(record["JT"], "Bioinformatics (Oxford, England)")
self.assertEqual(record["JID"], "9808944")
self.assertEqual(record["SB"], "IM")
self.assertEqual(record["MH"], ["*Algorithms", "*Cluster Analysis", "Gene Expression Profiling/*methods", "Pattern Recognition, Automated/methods", "*Programming Languages", "Sequence Alignment/*methods", "Sequence Analysis, DNA/*methods", "*Software"])
self.assertEqual(
record["MH"],
[
"*Algorithms",
"*Cluster Analysis",
"Gene Expression Profiling/*methods",
"Pattern Recognition, Automated/methods",
"*Programming Languages",
"Sequence Alignment/*methods",
"Sequence Analysis, DNA/*methods",
"*Software",
],
)
self.assertEqual(record["EDAT"], "2004/02/12 05:00")
self.assertEqual(record["MHDA"], "2005/01/05 09:00")
self.assertEqual(record["PHST"], ["2004/02/10 [aheadofprint]"])
self.assertEqual(record["AID"], ["10.1093/bioinformatics/bth078 [doi]", "bth078 [pii]"])
self.assertEqual(
record["AID"], ["10.1093/bioinformatics/bth078 [doi]", "bth078 [pii]"]
)
self.assertEqual(record["PST"], "ppublish")
self.assertEqual(record["SO"], "Bioinformatics. 2004 Jun 12;20(9):1453-4. Epub 2004 Feb 10.")
self.assertEqual(
record["SO"],
"Bioinformatics. 2004 Jun 12;20(9):1453-4. Epub 2004 Feb 10.",
)
record = next(records)
self.assertEqual(record["PMID"], "14630660")
self.assertEqual(record["OWN"], "NLM")
@ -160,39 +288,79 @@ class TestMedline(unittest.TestCase):
self.assertEqual(record["VI"], "19")
self.assertEqual(record["IP"], "17")
self.assertEqual(record["DP"], "2003 Nov 22")
self.assertEqual(record["TI"], "PDB file parser and structure class implemented in Python.")
self.assertEqual(
record["TI"],
"PDB file parser and structure class implemented in Python.",
)
self.assertEqual(record["PG"], "2308-10")
self.assertEqual(record["AB"], "The biopython project provides a set of bioinformatics tools implemented in Python. Recently, biopython was extended with a set of modules that deal with macromolecular structure. Biopython now contains a parser for PDB files that makes the atomic information available in an easy-to-use but powerful data structure. The parser and data structure deal with features that are often left out or handled inadequately by other packages, e.g. atom and residue disorder (if point mutants are present in the crystal), anisotropic B factors, multiple models and insertion codes. In addition, the parser performs some sanity checking to detect obvious errors. AVAILABILITY: The Biopython distribution (including source code and documentation) is freely available (under the Biopython license) from http://www.biopython.org")
self.assertEqual(record["AD"], ["Department of Cellular and Molecular Interactions, Vlaams Interuniversitair Instituut voor Biotechnologie and Computational Modeling Lab, Department of Computer Science, Vrije Universiteit Brussel, Pleinlaan 2, 1050 Brussels, Belgium. thamelry@vub.ac.be"])
self.assertEqual(
record["AB"],
"The biopython project provides a set of bioinformatics tools implemented in Python. Recently, biopython was extended with a set of modules that deal with macromolecular structure. Biopython now contains a parser for PDB files that makes the atomic information available in an easy-to-use but powerful data structure. The parser and data structure deal with features that are often left out or handled inadequately by other packages, e.g. atom and residue disorder (if point mutants are present in the crystal), anisotropic B factors, multiple models and insertion codes. In addition, the parser performs some sanity checking to detect obvious errors. AVAILABILITY: The Biopython distribution (including source code and documentation) is freely available (under the Biopython license) from http://www.biopython.org",
)
self.assertEqual(
record["AD"],
[
"Department of Cellular and Molecular Interactions, Vlaams Interuniversitair Instituut voor Biotechnologie and Computational Modeling Lab, Department of Computer Science, Vrije Universiteit Brussel, Pleinlaan 2, 1050 Brussels, Belgium. thamelry@vub.ac.be"
],
)
self.assertEqual(record["FAU"], ["Hamelryck, Thomas", "Manderick, Bernard"])
self.assertEqual(record["AU"], ["Hamelryck T", "Manderick B"])
self.assertEqual(record["LA"], ["eng"])
self.assertEqual(record["PT"], ["Comparative Study", "Evaluation Studies", "Journal Article", "Research Support, Non-U.S. Gov't", "Validation Studies"])
self.assertEqual(
record["PT"],
[
"Comparative Study",
"Evaluation Studies",
"Journal Article",
"Research Support, Non-U.S. Gov't",
"Validation Studies",
],
)
self.assertEqual(record["PL"], "England")
self.assertEqual(record["TA"], "Bioinformatics")
self.assertEqual(record["JT"], "Bioinformatics (Oxford, England)")
self.assertEqual(record["JID"], "9808944")
self.assertEqual(record["RN"], ["0 (Macromolecular Substances)"])
self.assertEqual(record["SB"], "IM")
self.assertEqual(record["MH"], ["Computer Simulation", "Database Management Systems/*standards", "*Databases, Protein", "Information Storage and Retrieval/*methods/*standards", "Macromolecular Substances", "*Models, Molecular", "*Programming Languages", "Protein Conformation", "*Software"])
self.assertEqual(
record["MH"],
[
"Computer Simulation",
"Database Management Systems/*standards",
"*Databases, Protein",
"Information Storage and Retrieval/*methods/*standards",
"Macromolecular Substances",
"*Models, Molecular",
"*Programming Languages",
"Protein Conformation",
"*Software",
],
)
self.assertEqual(record["EDAT"], "2003/11/25 05:00")
self.assertEqual(record["MHDA"], "2004/07/23 05:00")
self.assertEqual(record["PST"], "ppublish")
self.assertEqual(record["SO"], "Bioinformatics. 2003 Nov 22;19(17):2308-10.")
self.assertEqual(
record["SO"], "Bioinformatics. 2003 Nov 22;19(17):2308-10."
)
self.assertRaises(StopIteration, next, records)
def test_multiline_mesh(self):
with open("Medline/pubmed_result3.txt") as handle:
record = Medline.read(handle)
self.assertEqual(record["PMID"], "23039619")
self.assertEqual(record["MH"], ["Blood Circulation",
"High-Intensity Focused Ultrasound Ablation/adverse effects/instrumentation/*methods",
"Humans",
"Models, Biological",
"Sonication",
"Temperature",
"Time Factors",
"Transducers"])
self.assertEqual(
record["MH"],
[
"Blood Circulation",
"High-Intensity Focused Ultrasound Ablation/adverse effects/instrumentation/*methods",
"Humans",
"Models, Biological",
"Sonication",
"Temperature",
"Time Factors",
"Transducers",
],
)
if __name__ == "__main__":

View File

@ -32,13 +32,15 @@ if sys.platform == "win32":
# a Muscle directory with the muscle.exe file plus a readme etc,
# which the user could put anywhere. We'll try a few sensible
# locations under Program Files... and then the full path.
likely_dirs = ["", # Current dir
prog_files,
os.path.join(prog_files, "Muscle3.6"),
os.path.join(prog_files, "Muscle3.7"),
os.path.join(prog_files, "Muscle3.8"),
os.path.join(prog_files, "Muscle3.9"),
os.path.join(prog_files, "Muscle")] + sys.path
likely_dirs = [
"", # Current dir
prog_files,
os.path.join(prog_files, "Muscle3.6"),
os.path.join(prog_files, "Muscle3.7"),
os.path.join(prog_files, "Muscle3.8"),
os.path.join(prog_files, "Muscle3.9"),
os.path.join(prog_files, "Muscle"),
] + sys.path
for folder in likely_dirs:
if os.path.isdir(folder):
if os.path.isfile(os.path.join(folder, "muscle.exe")):
@ -48,6 +50,7 @@ if sys.platform == "win32":
break
else:
from subprocess import getoutput
output = getoutput("muscle -version")
# Since "not found" may be in another language, try and be sure this is
# really the MUSCLE tool's output
@ -57,13 +60,13 @@ else:
if not muscle_exe:
raise MissingExternalDependencyError(
"Install MUSCLE if you want to use the Bio.Align.Applications wrapper.")
"Install MUSCLE if you want to use the Bio.Align.Applications wrapper."
)
#################################################################
class MuscleApplication(unittest.TestCase):
def setUp(self):
self.infile1 = "Fasta/f002"
self.infile2 = "Fasta/fa01"
@ -85,11 +88,12 @@ class MuscleApplication(unittest.TestCase):
def test_Muscle_simple(self):
"""Simple round-trip through app just infile and outfile."""
cmdline = MuscleCommandline(muscle_exe,
input=self.infile1,
out=self.outfile1)
self.assertEqual(str(cmdline), _escape_filename(muscle_exe) +
' -in Fasta/f002 -out "Fasta/temp align out1.fa"')
cmdline = MuscleCommandline(muscle_exe, input=self.infile1, out=self.outfile1)
self.assertEqual(
str(cmdline),
_escape_filename(muscle_exe)
+ ' -in Fasta/f002 -out "Fasta/temp align out1.fa"',
)
self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
output, error = cmdline()
self.assertEqual(output, "")
@ -103,10 +107,13 @@ class MuscleApplication(unittest.TestCase):
# Use property:
cmdline.objscore = "sp"
cmdline.noanchors = True
self.assertEqual(str(cmdline), _escape_filename(muscle_exe) +
" -in Fasta/f002" +
" -out Fasta/temp_align_out2.fa" +
" -objscore sp -noanchors")
self.assertEqual(
str(cmdline),
_escape_filename(muscle_exe)
+ " -in Fasta/f002"
+ " -out Fasta/temp_align_out2.fa"
+ " -objscore sp -noanchors",
)
self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
output, error = cmdline()
self.assertEqual(output, "")
@ -120,9 +127,12 @@ class MuscleApplication(unittest.TestCase):
cmdline.set_parameter("profile", True)
cmdline.set_parameter("in1", self.infile2)
cmdline.set_parameter("in2", self.infile3)
self.assertEqual(str(cmdline), _escape_filename(muscle_exe) +
" -out Fasta/temp_align_out3.fa" +
" -profile -in1 Fasta/fa01 -in2 Fasta/f001")
self.assertEqual(
str(cmdline),
_escape_filename(muscle_exe)
+ " -out Fasta/temp_align_out3.fa"
+ " -profile -in1 Fasta/fa01 -in2 Fasta/f001",
)
self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
output, error = cmdline()
self.assertEqual(output, "")
@ -132,14 +142,22 @@ class MuscleApplication(unittest.TestCase):
def test_Muscle_profile_with_options(self):
"""Profile alignment, and switch and valued options."""
# Using some keyword arguments, note -stable isn't supported in v3.8
cmdline = MuscleCommandline(muscle_exe, out=self.outfile4,
in1=self.infile2, in2=self.infile3,
profile=True, stable=True,
cluster1="neighborjoining")
self.assertEqual(str(cmdline), _escape_filename(muscle_exe) +
" -out Fasta/temp_align_out4.fa" +
" -profile -in1 Fasta/fa01 -in2 Fasta/f001" +
" -cluster1 neighborjoining -stable")
cmdline = MuscleCommandline(
muscle_exe,
out=self.outfile4,
in1=self.infile2,
in2=self.infile3,
profile=True,
stable=True,
cluster1="neighborjoining",
)
self.assertEqual(
str(cmdline),
_escape_filename(muscle_exe)
+ " -out Fasta/temp_align_out4.fa"
+ " -profile -in1 Fasta/fa01 -in2 Fasta/f001"
+ " -cluster1 neighborjoining -stable",
)
self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
"""
#TODO - Why doesn't this work with MUSCLE 3.6 on the Mac?
@ -188,14 +206,17 @@ class SimpleAlignTest(unittest.TestCase):
records = list(SeqIO.parse(input_file, "fasta"))
records.sort(key=lambda rec: rec.id) # noqa: E731
cmdline = MuscleCommandline(muscle_exe, input=input_file, msf=True)
self.assertEqual(str(cmdline).rstrip(), _escape_filename(muscle_exe) +
" -in Fasta/f002 -msf")
self.assertEqual(
str(cmdline).rstrip(), _escape_filename(muscle_exe) + " -in Fasta/f002 -msf"
)
self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
child = subprocess.Popen(str(cmdline),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True,
shell=(sys.platform != "win32"))
child = subprocess.Popen(
str(cmdline),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True,
shell=(sys.platform != "win32"),
)
# Didn't use -quiet so there should be progress reports on stderr,
align = AlignIO.read(child.stdout, "msf")
align.sort() # by record.id
@ -218,14 +239,17 @@ class SimpleAlignTest(unittest.TestCase):
records.sort(key=lambda rec: rec.id) # noqa: E731
# Prepare the command... use Clustal output (with a MUSCLE header)
cmdline = MuscleCommandline(muscle_exe, input=input_file, clw=True)
self.assertEqual(str(cmdline).rstrip(), _escape_filename(muscle_exe) +
" -in Fasta/f002 -clw")
self.assertEqual(
str(cmdline).rstrip(), _escape_filename(muscle_exe) + " -in Fasta/f002 -clw"
)
self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
child = subprocess.Popen(str(cmdline),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True,
shell=(sys.platform != "win32"))
child = subprocess.Popen(
str(cmdline),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True,
shell=(sys.platform != "win32"),
)
# Didn't use -quiet so there should be progress reports on stderr,
align = AlignIO.read(child.stdout, "clustal")
align.sort() # by record.id
@ -251,14 +275,18 @@ class SimpleAlignTest(unittest.TestCase):
cmdline.set_parameter("in", input_file)
# Use clustal output (with a CLUSTAL header)
cmdline.set_parameter("clwstrict", True) # Default None treated as False!
self.assertEqual(str(cmdline).rstrip(), _escape_filename(muscle_exe) +
" -in Fasta/f002 -clwstrict")
self.assertEqual(
str(cmdline).rstrip(),
_escape_filename(muscle_exe) + " -in Fasta/f002 -clwstrict",
)
self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
child = subprocess.Popen(str(cmdline),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True,
shell=(sys.platform != "win32"))
child = subprocess.Popen(
str(cmdline),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True,
shell=(sys.platform != "win32"),
)
# Didn't use -quiet so there should be progress reports on stderr,
align = AlignIO.read(child.stdout, "clustal")
align.sort()
@ -291,15 +319,20 @@ class SimpleAlignTest(unittest.TestCase):
cmdline.set_parameter("maxhours", 0.1)
# No progress reports to stderr
cmdline.set_parameter("quiet", True) # Default None treated as False!
self.assertEqual(str(cmdline).rstrip(), _escape_filename(muscle_exe) +
" -in temp_cw_prot.fasta -diags -maxhours 0.1" +
" -maxiters 1 -clwstrict -quiet")
self.assertEqual(
str(cmdline).rstrip(),
_escape_filename(muscle_exe)
+ " -in temp_cw_prot.fasta -diags -maxhours 0.1"
+ " -maxiters 1 -clwstrict -quiet",
)
self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
child = subprocess.Popen(str(cmdline),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True,
shell=(sys.platform != "win32"))
child = subprocess.Popen(
str(cmdline),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True,
shell=(sys.platform != "win32"),
)
align = AlignIO.read(child.stdout, "clustal")
align.sort()
records.sort(key=lambda rec: rec.id) # noqa: E731
@ -323,15 +356,16 @@ class SimpleAlignTest(unittest.TestCase):
records = list(SeqIO.parse(input_file, "fasta"))
# Prepare the command... use Clustal output (with a MUSCLE header)
cline = MuscleCommandline(muscle_exe, clw=True)
self.assertEqual(str(cline).rstrip(),
_escape_filename(muscle_exe) + " -clw")
self.assertEqual(str(cline).rstrip(), _escape_filename(muscle_exe) + " -clw")
self.assertEqual(str(eval(repr(cline))), str(cline))
child = subprocess.Popen(str(cline),
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True,
shell=(sys.platform != "win32"))
child = subprocess.Popen(
str(cline),
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True,
shell=(sys.platform != "win32"),
)
SeqIO.write(records, child.stdin, "fasta")
child.stdin.close()
# Alignment will now run...
@ -356,18 +390,27 @@ class SimpleAlignTest(unittest.TestCase):
records = list(SeqIO.parse(input_file, "fasta"))
records.sort(key=lambda rec: rec.id) # noqa: E731
# Prepare the command... use Clustal output (with a MUSCLE header)
cmdline = MuscleCommandline(muscle_exe, input=input_file,
clw=True, htmlout=output_html,
clwstrictout=output_clwstrict)
self.assertEqual(str(cmdline).rstrip(), _escape_filename(muscle_exe) +
" -in Fasta/f002 -clw -htmlout temp_f002.html" +
" -clwstrictout temp_f002.clw")
cmdline = MuscleCommandline(
muscle_exe,
input=input_file,
clw=True,
htmlout=output_html,
clwstrictout=output_clwstrict,
)
self.assertEqual(
str(cmdline).rstrip(),
_escape_filename(muscle_exe)
+ " -in Fasta/f002 -clw -htmlout temp_f002.html"
+ " -clwstrictout temp_f002.clw",
)
self.assertEqual(str(eval(repr(cmdline))), str(cmdline))
child = subprocess.Popen(str(cmdline),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True,
shell=(sys.platform != "win32"))
child = subprocess.Popen(
str(cmdline),
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True,
shell=(sys.platform != "win32"),
)
# Clustalw on stdout:
align = AlignIO.read(child.stdout, "clustal")
align.sort()

View File

@ -28,17 +28,24 @@ class ParseMMTF(unittest.TestCase):
for i, e in enumerate(self.mmcif_atoms):
mmtf_atom = self.mmtf_atoms[i]
mmcif_atom = self.mmcif_atoms[i]
self.assertEqual(mmtf_atom.name, mmcif_atom.name) # eg. CA, spaces are removed from atom name
self.assertEqual(mmtf_atom.fullname, mmcif_atom.fullname) # e.g. " CA ", spaces included
self.assertEqual(
mmtf_atom.name, mmcif_atom.name
) # eg. CA, spaces are removed from atom name
self.assertEqual(
mmtf_atom.fullname, mmcif_atom.fullname
) # e.g. " CA ", spaces included
self.assertAlmostEqual(mmtf_atom.coord[0], mmcif_atom.coord[0], places=3)
self.assertAlmostEqual(mmtf_atom.coord[1], mmcif_atom.coord[1], places=3)
self.assertAlmostEqual(mmtf_atom.coord[2], mmcif_atom.coord[2], places=3)
self.assertEqual(mmtf_atom.bfactor, mmcif_atom.bfactor)
self.assertEqual(mmtf_atom.occupancy, mmcif_atom.occupancy)
self.assertEqual(mmtf_atom.altloc, mmcif_atom.altloc)
self.assertEqual(mmtf_atom.full_id,
mmcif_atom.full_id) # (structure id, model id, chain id, residue id, atom id)
self.assertEqual(mmtf_atom.id, mmcif_atom.name) # id of atom is the atom name (e.g. "CA")
self.assertEqual(
mmtf_atom.full_id, mmcif_atom.full_id
) # (structure id, model id, chain id, residue id, atom id)
self.assertEqual(
mmtf_atom.id, mmcif_atom.name
) # id of atom is the atom name (e.g. "CA")
# self.assertEqual(mmtf_atom.serial_number,mmcif_atom.serial_number) # mmCIF serial number is none
self.assertEqual(mmtf_atom - mmtf_atom, 0)
self.assertEqual(mmtf_atom - mmcif_atom, 0)
@ -78,12 +85,16 @@ class ParseMMTF(unittest.TestCase):
self.mmcif_res = list(mmcif_struct.get_residues())
self.mmtf_res = list(mmtf_struct.get_residues())
self.check_residues()
self.assertEqual(sum(1 for _ in mmcif_struct.get_models()), sum(1 for _ in mmtf_struct.get_models()))
self.assertEqual(
sum(1 for _ in mmcif_struct.get_models()),
sum(1 for _ in mmtf_struct.get_models()),
)
def test_4CUP(self):
"""Compare parsing 4CUP.mmtf and 4CUP.cif."""
self.check_mmtf_vs_cif("PDB/4CUP.mmtf", "PDB/4CUP.cif")
# TODO:
# def test_1A8O(self):
# """Compare parsing 1A8O.mmtf and 1A8O.cif"""
@ -140,7 +151,9 @@ class WriteMMTF(unittest.TestCase):
self.assertEqual(set(dict_back.ins_code_list), {"\x00"})
self.assertEqual(set(dict_back.alt_loc_list), {"\x00"})
self.assertEqual(list(dict_back.atom_id_list), list(range(1, 645)))
self.assertEqual(list(dict_back.sequence_index_list), list(range(70)) + [-1] * 88)
self.assertEqual(
list(dict_back.sequence_index_list), list(range(70)) + [-1] * 88
)
self.assertEqual(dict_back.chain_id_list, ["A", "B"])
self.assertEqual(dict_back.chain_name_list, ["A", "A"])
self.assertEqual(dict_back.chains_per_model, [2])
@ -151,7 +164,10 @@ class WriteMMTF(unittest.TestCase):
self.assertEqual(len(dict_back.entity_list), 2)
self.assertEqual(dict_back.entity_list[0]["type"], "polymer")
self.assertEqual(dict_back.entity_list[0]["chainIndexList"], [0])
self.assertEqual(dict_back.entity_list[0]["sequence"], "MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPGATLEEMMTACQG")
self.assertEqual(
dict_back.entity_list[0]["sequence"],
"MDIRQGPKEPFRDYVDRFYKTLRAEQASQEVKNWMTETLLVQNANPDCKTILKALGPGATLEEMMTACQG",
)
self.assertEqual(dict_back.entity_list[1]["type"], "water")
self.assertEqual(dict_back.entity_list[1]["chainIndexList"], [1])
self.assertEqual(dict_back.entity_list[1]["sequence"], "")
@ -174,7 +190,9 @@ class WriteMMTF(unittest.TestCase):
self.assertEqual(dict_back.num_chains, 4)
self.assertEqual(dict_back.num_groups, 4)
self.assertEqual(dict_back.num_atoms, 4)
self.assertEqual(list(dict_back.x_coord_list), [-1.058, -0.025, 7.024, 6.259])
self.assertEqual(
list(dict_back.x_coord_list), [-1.058, -0.025, 7.024, 6.259]
)
self.assertEqual(dict_back.chain_id_list, ["A", "B", "A", "B"])
self.assertEqual(dict_back.chain_name_list, ["A", "B", "A", "B"])
self.assertEqual(dict_back.chains_per_model, [2, 2])

View File

@ -13,6 +13,7 @@ from Bio.PDB.PDBExceptions import PDBConstructionWarning
from Bio.PDB.mmtf import MMTFParser
import requires_internet
requires_internet.check()

View File

@ -107,7 +107,10 @@ class MotifTestsBasic(unittest.TestCase):
self.assertEqual(str(record[1].instances[19]), "GGCGGGCCATCCCTGTATGAA")
self.assertEqual(str(record[1].instances[20]), "CTCCAGGTCGCATGGAGAGAG")
self.assertEqual(str(record[1].instances[21]), "CCTCGGATCGCTTGGGAAGAG")
self.assertEqual(record[1].mask, (1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1))
self.assertEqual(
record[1].mask,
(1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1),
)
self.assertAlmostEqual(record[1].score, 19.6235)
self.assertEqual(record[2].alphabet, "ACGT")
@ -130,7 +133,10 @@ class MotifTestsBasic(unittest.TestCase):
self.assertEqual(str(record[2].instances[15]), "GACCTGGAGGCTTAGACTTGG")
self.assertEqual(str(record[2].instances[16]), "GCGCTCTTCCCAAGCGATCCG")
self.assertEqual(str(record[2].instances[17]), "GGGCCGTCAGCTCTCAAGTCT")
self.assertEqual(record[2].mask, (1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1))
self.assertEqual(
record[2].mask,
(1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1),
)
self.assertAlmostEqual(record[2].score, 19.1804)
self.assertEqual(record[3].alphabet, "ACGT")
@ -151,7 +157,10 @@ class MotifTestsBasic(unittest.TestCase):
self.assertEqual(str(record[3].instances[13]), "GCGATCAGCTTGTGGGCGTGC")
self.assertEqual(str(record[3].instances[14]), "GACAAATCGGATACTGGGGCA")
self.assertEqual(str(record[3].instances[15]), "GCACTTAGCAGCGTATCGTTA")
self.assertEqual(record[3].mask, (1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1))
self.assertEqual(
record[3].mask,
(1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1),
)
self.assertAlmostEqual(record[3].score, 18.0097)
self.assertEqual(record[4].alphabet, "ACGT")
self.assertEqual(len(record[4].instances), 15)
@ -192,7 +201,10 @@ class MotifTestsBasic(unittest.TestCase):
self.assertEqual(str(record[5].instances[15]), "CTCTGCGTCGCATGGCGGCGTGG")
self.assertEqual(str(record[5].instances[16]), "GGAGGCTTAGACTTGGGCGATAC")
self.assertEqual(str(record[5].instances[17]), "GCATGGAGAGAGATCCGGAGGAG")
self.assertEqual(record[5].mask, (1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1))
self.assertEqual(
record[5].mask,
(1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1),
)
self.assertAlmostEqual(record[5].score, 15.0441)
self.assertEqual(record[6].alphabet, "ACGT")
self.assertEqual(len(record[6].instances), 20)
@ -240,7 +252,10 @@ class MotifTestsBasic(unittest.TestCase):
self.assertEqual(str(record[7].instances[17]), "AGTCAATGACACGCGCCTGGG")
self.assertEqual(str(record[7].instances[18]), "GGTCATGGAATCTTATGTAGC")
self.assertEqual(str(record[7].instances[19]), "GTAGATAACAGAGGTCGGGGG")
self.assertEqual(record[7].mask, (1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1))
self.assertEqual(
record[7].mask,
(1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1),
)
self.assertAlmostEqual(record[7].score, 11.6098)
self.assertEqual(record[8].alphabet, "ACGT")
self.assertEqual(len(record[8].instances), 14)
@ -297,7 +312,10 @@ class MotifTestsBasic(unittest.TestCase):
self.assertEqual(str(record[10].instances[10]), "ATCCTCTGCGTCGCATGGCGG")
self.assertEqual(str(record[10].instances[11]), "GACCATAGACGAGCATCAAAG")
self.assertEqual(str(record[10].instances[12]), "GGCCCTCGGATCGCTTGGGAA")
self.assertEqual(record[10].mask, (1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1))
self.assertEqual(
record[10].mask,
(1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1),
)
self.assertAlmostEqual(record[10].score, 9.01393)
self.assertEqual(record[11].alphabet, "ACGT")
self.assertEqual(len(record[11].instances), 16)
@ -337,7 +355,10 @@ class MotifTestsBasic(unittest.TestCase):
self.assertEqual(str(record[12].instances[13]), "GCACGTAGCTGGTAAATAGG")
self.assertEqual(str(record[12].instances[14]), "GCGGCGTGGATTTCATACAG")
self.assertEqual(str(record[12].instances[15]), "CCTGGAGGCTTAGACTTGGG")
self.assertEqual(record[12].mask, (1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1))
self.assertEqual(
record[12].mask,
(1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1),
)
self.assertAlmostEqual(record[12].score, 5.63667)
self.assertEqual(record[13].alphabet, "ACGT")
self.assertEqual(len(record[13].instances), 15)
@ -356,7 +377,10 @@ class MotifTestsBasic(unittest.TestCase):
self.assertEqual(str(record[13].instances[12]), "ACGCACGGGACTTCAACCAG")
self.assertEqual(str(record[13].instances[13]), "GCACGTAGCTGGTAAATAGG")
self.assertEqual(str(record[13].instances[14]), "ATCCTCTGCGTCGCATGGCG")
self.assertEqual(record[13].mask, (1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1))
self.assertEqual(
record[13].mask,
(1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1),
)
self.assertAlmostEqual(record[13].score, 3.89842)
self.assertEqual(record[14].alphabet, "ACGT")
self.assertEqual(len(record[14].instances), 14)
@ -399,7 +423,9 @@ class MotifTestsBasic(unittest.TestCase):
self.assertEqual(str(record[15].instances[18]), "AGGCTCGCACGTAGCTGG")
self.assertEqual(str(record[15].instances[19]), "CCACGCCGCCATGCGACG")
self.assertEqual(str(record[15].instances[20]), "AGCCTCCAGGTCGCATGG")
self.assertEqual(record[15].mask, (1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1))
self.assertEqual(
record[15].mask, (1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1)
)
self.assertAlmostEqual(record[15].score, 1.0395)
def test_clusterbuster_parsing_and_output(self):
@ -414,7 +440,10 @@ class MotifTestsBasic(unittest.TestCase):
self.assertEqual(record[2].degenerate_consensus, "CAATTATT")
self.CLUSTERBUSTERin.seek(0)
self.assertEqual(motifs.write(record, "clusterbuster").split(), self.CLUSTERBUSTERin.read().split())
self.assertEqual(
motifs.write(record, "clusterbuster").split(),
self.CLUSTERBUSTERin.read().split(),
)
def test_xms_parsing(self):
"""Test if Bio.motifs can parse and output xms PFM files."""
@ -530,7 +559,6 @@ T: 0.50 0.17 0.50 0.17 0.50
class TestMEME(unittest.TestCase):
def test_meme_parser_1(self):
"""Parse motifs/meme.INO_up800.classic.oops.xml file."""
with open("motifs/meme.INO_up800.classic.oops.xml") as handle:
@ -546,7 +574,10 @@ class TestMEME(unittest.TestCase):
self.assertEqual(record.sequences[4], "sequence_4")
self.assertEqual(record.sequences[5], "sequence_5")
self.assertEqual(record.sequences[6], "sequence_6")
self.assertEqual(record.command, "meme common/INO_up800.s -oc results/meme10 -mod oops -dna -revcomp -bfile common/yeast.nc.6.freq -nmotifs 2 -objfun classic -minw 8 -nostatus ")
self.assertEqual(
record.command,
"meme common/INO_up800.s -oc results/meme10 -mod oops -dna -revcomp -bfile common/yeast.nc.6.freq -nmotifs 2 -objfun classic -minw 8 -nostatus ",
)
self.assertEqual(len(record), 2)
motif = record[0]
self.assertEqual(motif.name, "GSKGCATGTGAAA")
@ -695,7 +726,10 @@ class TestMEME(unittest.TestCase):
self.assertEqual(record.sequences[30], "sequence_30")
self.assertEqual(record.sequences[31], "sequence_31")
self.assertEqual(record.sequences[32], "sequence_32")
self.assertEqual(record.command, "meme common/adh.s -oc results/meme4 -mod oops -protein -nmotifs 2 -objfun classic -minw 8 -nostatus ")
self.assertEqual(
record.command,
"meme common/adh.s -oc results/meme4 -mod oops -protein -nmotifs 2 -objfun classic -minw 8 -nostatus ",
)
self.assertEqual(len(record), 2)
motif = record[0]
self.assertEqual(motif.id, "motif_1")
@ -1124,7 +1158,10 @@ class TestMEME(unittest.TestCase):
self.assertEqual(record.sequences[2], "sequence_2")
self.assertEqual(record.sequences[3], "sequence_3")
self.assertEqual(record.sequences[4], "sequence_4")
self.assertEqual(record.command, "meme common/farntrans5.s -oc results/meme15 -mod anr -protein -nmotifs 2 -objfun classic -minw 8 -nostatus ")
self.assertEqual(
record.command,
"meme common/farntrans5.s -oc results/meme15 -mod anr -protein -nmotifs 2 -objfun classic -minw 8 -nostatus ",
)
self.assertEqual(len(record), 2)
motif = record[0]
self.assertEqual(motif.name, "GGFGGRPGKEVDLCYTYCALAALAJLGSLD")
@ -1755,109 +1792,257 @@ class TestMAST(unittest.TestCase):
self.assertEqual(record.sequences[110], "chr5:105994747-105995247")
self.assertEqual(record.sequences[111], "chr17:84209565-84210065")
self.assertEqual(record.sequences[112], "chr7:16507689-16508189")
self.assertEqual(record.diagrams["chr3:104843905-104844405"], "115-[-1]-209-[-2]-126")
self.assertEqual(record.diagrams["chr12:114390660-114391160"], "3-[+2]-[+2]-3-[+1]-173-[+1]-3-[-2]-188")
self.assertEqual(record.diagrams["chr12:27135944-27136444"], "275-[-1]-89-[+2]-4-[+2]-52")
self.assertEqual(record.diagrams["chr10:59256089-59256589"], "247-[+2]-17-[-1]-186")
self.assertEqual(record.diagrams["chr4:135733850-135734350"], "183-[-1]-263-[+2]-4")
self.assertEqual(record.diagrams["chr1:137838164-137838664"], "192-[-2]-1-[+1]-44-[-1]-193")
self.assertEqual(record.diagrams["chr17:47735006-47735506"], "203-[+2]-15-[+1]-97-[-1]-115")
self.assertEqual(record.diagrams["chr6:72223026-72223526"], "52-[-2]-7-[+2]-162-[-1]-42-[-1]-137")
self.assertEqual(record.diagrams["chr13:3866266-3866766"], "241-[+1]-2-[-1]-217")
self.assertEqual(record.diagrams["chr1:133343883-133344383"], "190-[+2]-15-[+1]-245")
self.assertEqual(record.diagrams["chr11:117187372-117187872"], "242-[+1]-46-[-2]-71-[+1]-71")
self.assertEqual(record.diagrams["chr13:76003199-76003699"], "230-[+2]-15-[+2]-60-[-1]-115")
self.assertEqual(record.diagrams["chr5:65202593-65203093"], "24-[-2]-36-[+2]-193-[-1]-11-[+1]-10-[+1]-106")
self.assertEqual(record.diagrams["chr14:79702844-79703344"], "247-[-1]-46-[-2]-157")
self.assertEqual(record.diagrams["chr12:112796794-112797294"], "232-[+1]-41-[+1]-187")
self.assertEqual(record.diagrams["chr13:112863645-112864145"], "228-[+1]-20-[-1]-212")
self.assertEqual(record.diagrams["chr7:111007530-111008030"], "217-[+1]-83-[+2]-150")
self.assertEqual(record.diagrams["chr1:43307690-43308190"], "164-[-2]-52-[-2]-224")
self.assertEqual(record.diagrams["chr14:47973722-47974222"], "21-[+1]-181-[+1]-20-[-2]-208")
self.assertEqual(record.diagrams["chr9:120025371-120025871"], "110-[-2]-58-[+1]-282")
self.assertEqual(record.diagrams["chr7:105490727-105491227"], "100-[-2]-111-[-1]-239")
self.assertEqual(record.diagrams["chr5:37127175-37127675"], "234-[-2]-24-[+1]-192")
self.assertEqual(
record.diagrams["chr3:104843905-104844405"], "115-[-1]-209-[-2]-126"
)
self.assertEqual(
record.diagrams["chr12:114390660-114391160"],
"3-[+2]-[+2]-3-[+1]-173-[+1]-3-[-2]-188",
)
self.assertEqual(
record.diagrams["chr12:27135944-27136444"], "275-[-1]-89-[+2]-4-[+2]-52"
)
self.assertEqual(
record.diagrams["chr10:59256089-59256589"], "247-[+2]-17-[-1]-186"
)
self.assertEqual(
record.diagrams["chr4:135733850-135734350"], "183-[-1]-263-[+2]-4"
)
self.assertEqual(
record.diagrams["chr1:137838164-137838664"], "192-[-2]-1-[+1]-44-[-1]-193"
)
self.assertEqual(
record.diagrams["chr17:47735006-47735506"], "203-[+2]-15-[+1]-97-[-1]-115"
)
self.assertEqual(
record.diagrams["chr6:72223026-72223526"],
"52-[-2]-7-[+2]-162-[-1]-42-[-1]-137",
)
self.assertEqual(
record.diagrams["chr13:3866266-3866766"], "241-[+1]-2-[-1]-217"
)
self.assertEqual(
record.diagrams["chr1:133343883-133344383"], "190-[+2]-15-[+1]-245"
)
self.assertEqual(
record.diagrams["chr11:117187372-117187872"], "242-[+1]-46-[-2]-71-[+1]-71"
)
self.assertEqual(
record.diagrams["chr13:76003199-76003699"], "230-[+2]-15-[+2]-60-[-1]-115"
)
self.assertEqual(
record.diagrams["chr5:65202593-65203093"],
"24-[-2]-36-[+2]-193-[-1]-11-[+1]-10-[+1]-106",
)
self.assertEqual(
record.diagrams["chr14:79702844-79703344"], "247-[-1]-46-[-2]-157"
)
self.assertEqual(
record.diagrams["chr12:112796794-112797294"], "232-[+1]-41-[+1]-187"
)
self.assertEqual(
record.diagrams["chr13:112863645-112864145"], "228-[+1]-20-[-1]-212"
)
self.assertEqual(
record.diagrams["chr7:111007530-111008030"], "217-[+1]-83-[+2]-150"
)
self.assertEqual(
record.diagrams["chr1:43307690-43308190"], "164-[-2]-52-[-2]-224"
)
self.assertEqual(
record.diagrams["chr14:47973722-47974222"], "21-[+1]-181-[+1]-20-[-2]-208"
)
self.assertEqual(
record.diagrams["chr9:120025371-120025871"], "110-[-2]-58-[+1]-282"
)
self.assertEqual(
record.diagrams["chr7:105490727-105491227"], "100-[-2]-111-[-1]-239"
)
self.assertEqual(
record.diagrams["chr5:37127175-37127675"], "234-[-2]-24-[+1]-192"
)
self.assertEqual(record.diagrams["chr5:45951565-45952065"], "261-[-1]-219")
self.assertEqual(record.diagrams["chr7:91033422-91033922"], "465-[-1]-15")
self.assertEqual(record.diagrams["chr4:154285745-154286245"], "235-[+1]-20-[-2]-195")
self.assertEqual(record.diagrams["chr13:100518008-100518508"], "226-[-2]-18-[-1]-206")
self.assertEqual(record.diagrams["chr1:36977019-36977519"], "88-[+1]-187-[+2]-60-[-1]-95")
self.assertEqual(record.diagrams["chr7:151917814-151918314"], "219-[+1]-80-[+2]-151")
self.assertEqual(record.diagrams["chr7:110976195-110976695"], "287-[+2]-12-[+1]-151")
self.assertEqual(
record.diagrams["chr4:154285745-154286245"], "235-[+1]-20-[-2]-195"
)
self.assertEqual(
record.diagrams["chr13:100518008-100518508"], "226-[-2]-18-[-1]-206"
)
self.assertEqual(
record.diagrams["chr1:36977019-36977519"], "88-[+1]-187-[+2]-60-[-1]-95"
)
self.assertEqual(
record.diagrams["chr7:151917814-151918314"], "219-[+1]-80-[+2]-151"
)
self.assertEqual(
record.diagrams["chr7:110976195-110976695"], "287-[+2]-12-[+1]-151"
)
self.assertEqual(record.diagrams["chr15:58719281-58719781"], "212-[-2]-258")
self.assertEqual(record.diagrams["chr11:57590460-57590960"], "56-[-1]-271-[-1]-75-[+2]-28")
self.assertEqual(record.diagrams["chr8:83025150-83025650"], "219-[+1]-87-[+2]-144")
self.assertEqual(record.diagrams["chr13:54345922-54346422"], "283-[-2]-161-[+1]-6")
self.assertEqual(record.diagrams["chr12:82044358-82044858"], "50-[+2]-160-[+1]-39-[+2]-171")
self.assertEqual(record.diagrams["chr11:105013714-105014214"], "115-[-2]-160-[+1]-26-[-1]-129")
self.assertEqual(record.diagrams["chr10:93585404-93585904"], "141-[+2]-48-[+1]-261")
self.assertEqual(
record.diagrams["chr11:57590460-57590960"], "56-[-1]-271-[-1]-75-[+2]-28"
)
self.assertEqual(
record.diagrams["chr8:83025150-83025650"], "219-[+1]-87-[+2]-144"
)
self.assertEqual(
record.diagrams["chr13:54345922-54346422"], "283-[-2]-161-[+1]-6"
)
self.assertEqual(
record.diagrams["chr12:82044358-82044858"], "50-[+2]-160-[+1]-39-[+2]-171"
)
self.assertEqual(
record.diagrams["chr11:105013714-105014214"],
"115-[-2]-160-[+1]-26-[-1]-129",
)
self.assertEqual(
record.diagrams["chr10:93585404-93585904"], "141-[+2]-48-[+1]-261"
)
self.assertEqual(record.diagrams["chr7:19832207-19832707"], "229-[-1]-251")
self.assertEqual(record.diagrams["chr8:97323995-97324495"], "177-[-1]-40-[-2]-139-[+1]-74")
self.assertEqual(record.diagrams["chr10:126642277-126642777"], "252-[-1]-92-[-2]-106")
self.assertEqual(record.diagrams["chr1:156887119-156887619"], "189-[-2]-78-[-1]-183")
self.assertEqual(record.diagrams["chr15:81700367-81700867"], "109-[-1]-99-[-1]-252")
self.assertEqual(record.diagrams["chr6:121187425-121187925"], "29-[+2]-313-[-1]-108")
self.assertEqual(record.diagrams["chr4:43977111-43977611"], "60-[+1]-148-[+1]-252")
self.assertEqual(record.diagrams["chr11:102236405-102236905"], "10-[+2]-145-[-1]-3-[-1]-6-[+2]-60-[+1]-156")
self.assertEqual(
record.diagrams["chr8:97323995-97324495"], "177-[-1]-40-[-2]-139-[+1]-74"
)
self.assertEqual(
record.diagrams["chr10:126642277-126642777"], "252-[-1]-92-[-2]-106"
)
self.assertEqual(
record.diagrams["chr1:156887119-156887619"], "189-[-2]-78-[-1]-183"
)
self.assertEqual(
record.diagrams["chr15:81700367-81700867"], "109-[-1]-99-[-1]-252"
)
self.assertEqual(
record.diagrams["chr6:121187425-121187925"], "29-[+2]-313-[-1]-108"
)
self.assertEqual(
record.diagrams["chr4:43977111-43977611"], "60-[+1]-148-[+1]-252"
)
self.assertEqual(
record.diagrams["chr11:102236405-102236905"],
"10-[+2]-145-[-1]-3-[-1]-6-[+2]-60-[+1]-156",
)
self.assertEqual(record.diagrams["chr17:5112057-5112557"], "249-[+1]-231")
self.assertEqual(record.diagrams["chr10:110604369-110604869"], "232-[+1]-248")
self.assertEqual(record.diagrams["chr1:169314208-169314708"], "192-[-1]-[-1]-11-[-2]-227")
self.assertEqual(record.diagrams["chr9:57618594-57619094"], "125-[+2]-151-[-1]-4-[-1]-150")
self.assertEqual(record.diagrams["chr10:128184604-128185104"], "30-[-2]-128-[+1]-292")
self.assertEqual(record.diagrams["chr4:109112541-109113041"], "21-[-1]-13-[+1]-94-[+2]-302")
self.assertEqual(record.diagrams["chr3:97461668-97462168"], "18-[+2]-256-[-1]-81-[+1]-21-[+1]-34")
self.assertEqual(
record.diagrams["chr1:169314208-169314708"], "192-[-1]-[-1]-11-[-2]-227"
)
self.assertEqual(
record.diagrams["chr9:57618594-57619094"], "125-[+2]-151-[-1]-4-[-1]-150"
)
self.assertEqual(
record.diagrams["chr10:128184604-128185104"], "30-[-2]-128-[+1]-292"
)
self.assertEqual(
record.diagrams["chr4:109112541-109113041"], "21-[-1]-13-[+1]-94-[+2]-302"
)
self.assertEqual(
record.diagrams["chr3:97461668-97462168"],
"18-[+2]-256-[-1]-81-[+1]-21-[+1]-34",
)
self.assertEqual(record.diagrams["chr9:102674395-102674895"], "372-[+2]-98")
self.assertEqual(record.diagrams["chr17:24289205-24289705"], "262-[-1]-218")
self.assertEqual(record.diagrams["chr17:28960252-28960752"], "221-[+1]-81-[+1]-158")
self.assertEqual(
record.diagrams["chr17:28960252-28960752"], "221-[+1]-81-[+1]-158"
)
self.assertEqual(record.diagrams["chr2:73323093-73323593"], "49-[-2]-421")
self.assertEqual(record.diagrams["chr11:32150818-32151318"], "151-[-1]-27-[-1]-118-[-2]-134")
self.assertEqual(record.diagrams["chr7:103853792-103854292"], "212-[-2]-42-[+1]-196")
self.assertEqual(record.diagrams["chr16:49839621-49840121"], "192-[+2]-47-[-1]-17-[+2]-164")
self.assertEqual(
record.diagrams["chr11:32150818-32151318"], "151-[-1]-27-[-1]-118-[-2]-134"
)
self.assertEqual(
record.diagrams["chr7:103853792-103854292"], "212-[-2]-42-[+1]-196"
)
self.assertEqual(
record.diagrams["chr16:49839621-49840121"], "192-[+2]-47-[-1]-17-[+2]-164"
)
self.assertEqual(record.diagrams["chr6:135115628-135116128"], "231-[-1]-249")
self.assertEqual(record.diagrams["chr3:88305500-88306000"], "229-[+1]-251")
self.assertEqual(record.diagrams["chr18:57137388-57137888"], "296-[+2]-174")
self.assertEqual(record.diagrams["chr5:97380648-97381148"], "188-[-2]-282")
self.assertEqual(record.diagrams["chr15:91082416-91082916"], "239-[-1]-104-[-1]-73-[+2]-14")
self.assertEqual(record.diagrams["chr14:61272713-61273213"], "216-[+2]-104-[+1]-130")
self.assertEqual(
record.diagrams["chr15:91082416-91082916"], "239-[-1]-104-[-1]-73-[+2]-14"
)
self.assertEqual(
record.diagrams["chr14:61272713-61273213"], "216-[+2]-104-[+1]-130"
)
self.assertEqual(record.diagrams["chr5:33616214-33616714"], "247-[-1]-233")
self.assertEqual(record.diagrams["chr18:23982470-23982970"], "285-[-1]-195")
self.assertEqual(record.diagrams["chr9:24715045-24715545"], "214-[-1]-153-[+1]-93")
self.assertEqual(
record.diagrams["chr9:24715045-24715545"], "214-[-1]-153-[+1]-93"
)
self.assertEqual(record.diagrams["chr10:116195445-116195945"], "400-[+2]-70")
self.assertEqual(record.diagrams["chr11:77795184-77795684"], "247-[+1]-42-[-2]-67-[-2]-64")
self.assertEqual(record.diagrams["chr16:32508975-32509475"], "213-[+2]-29-[-1]-208")
self.assertEqual(
record.diagrams["chr11:77795184-77795684"], "247-[+1]-42-[-2]-67-[-2]-64"
)
self.assertEqual(
record.diagrams["chr16:32508975-32509475"], "213-[+2]-29-[-1]-208"
)
self.assertEqual(record.diagrams["chr18:80416880-80417380"], "239-[-1]-241")
self.assertEqual(record.diagrams["chr10:57252236-57252736"], "155-[+1]-158-[+2]-137")
self.assertEqual(record.diagrams["chr5:34915767-34916267"], "179-[+2]-29-[-1]-242")
self.assertEqual(
record.diagrams["chr10:57252236-57252736"], "155-[+1]-158-[+2]-137"
)
self.assertEqual(
record.diagrams["chr5:34915767-34916267"], "179-[+2]-29-[-1]-242"
)
self.assertEqual(record.diagrams["chr9:98389943-98390443"], "252-[-1]-228")
self.assertEqual(record.diagrams["chr19:5845899-5846399"], "136-[+1]-193-[+1]-131")
self.assertEqual(record.diagrams["chr3:151777796-151778296"], "30-[-2]-58-[-1]-362")
self.assertEqual(
record.diagrams["chr19:5845899-5846399"], "136-[+1]-193-[+1]-131"
)
self.assertEqual(
record.diagrams["chr3:151777796-151778296"], "30-[-2]-58-[-1]-362"
)
self.assertEqual(record.diagrams["chr4:76585120-76585620"], "329-[+2]-141")
self.assertEqual(record.diagrams["chr7:104332488-104332988"], "164-[+2]-23-[-1]-222-[+1]-21")
self.assertEqual(
record.diagrams["chr7:104332488-104332988"], "164-[+2]-23-[-1]-222-[+1]-21"
)
self.assertEqual(record.diagrams["chr5:138127197-138127697"], "238-[+1]-242")
self.assertEqual(record.diagrams["chr11:60988820-60989320"], "115-[+1]-68-[+1]-47-[+1]-210")
self.assertEqual(record.diagrams["chr8:19984030-19984530"], "103-[-1]-81-[+2]-266")
self.assertEqual(record.diagrams["chr11:31712262-31712762"], "118-[+2]-53-[+2]-269")
self.assertEqual(record.diagrams["chr15:41338514-41339014"], "173-[+2]-75-[+2]-192")
self.assertEqual(record.diagrams["chr9:21362671-21363171"], "105-[+1]-131-[+1]-224")
self.assertEqual(
record.diagrams["chr11:60988820-60989320"], "115-[+1]-68-[+1]-47-[+1]-210"
)
self.assertEqual(
record.diagrams["chr8:19984030-19984530"], "103-[-1]-81-[+2]-266"
)
self.assertEqual(
record.diagrams["chr11:31712262-31712762"], "118-[+2]-53-[+2]-269"
)
self.assertEqual(
record.diagrams["chr15:41338514-41339014"], "173-[+2]-75-[+2]-192"
)
self.assertEqual(
record.diagrams["chr9:21362671-21363171"], "105-[+1]-131-[+1]-224"
)
self.assertEqual(record.diagrams["chr18:58822702-58823202"], "467-[-2]-3")
self.assertEqual(record.diagrams["chr1:173447614-173448114"], "369-[-1]-111")
self.assertEqual(record.diagrams["chr6:81915769-81916269"], "197-[+1]-283")
self.assertEqual(record.diagrams["chr1:169322898-169323398"], "253-[-1]-227")
self.assertEqual(record.diagrams["chr12:70860461-70860961"], "197-[+2]-22-[-1]-231")
self.assertEqual(record.diagrams["chr9:59598186-59598686"], "163-[-2]-10-[-1]-277")
self.assertEqual(
record.diagrams["chr12:70860461-70860961"], "197-[+2]-22-[-1]-231"
)
self.assertEqual(
record.diagrams["chr9:59598186-59598686"], "163-[-2]-10-[-1]-277"
)
self.assertEqual(record.diagrams["chr3:19550495-19550995"], "452-[-2]-18")
self.assertEqual(record.diagrams["chr7:36132953-36133453"], "157-[-1]-323")
self.assertEqual(record.diagrams["chr7:38970375-38970875"], "49-[+1]-114-[+1]-297")
self.assertEqual(
record.diagrams["chr7:38970375-38970875"], "49-[+1]-114-[+1]-297"
)
self.assertEqual(record.diagrams["chr15:78243390-78243890"], "234-[+1]-246")
self.assertEqual(record.diagrams["chr7:87847381-87847881"], "99-[+2]-2-[-1]-230-[-1]-99")
self.assertEqual(
record.diagrams["chr7:87847381-87847881"], "99-[+2]-2-[-1]-230-[-1]-99"
)
self.assertEqual(record.diagrams["chr1:33631214-33631714"], "358-[-1]-122")
self.assertEqual(record.diagrams["chr4:135407873-135408373"], "116-[-1]-64-[+2]-270")
self.assertEqual(
record.diagrams["chr4:135407873-135408373"], "116-[-1]-64-[+2]-270"
)
self.assertEqual(record.diagrams["chr7:101244829-101245329"], "311-[-2]-159")
self.assertEqual(record.diagrams["chr10:60612190-60612690"], "215-[+1]-265")
self.assertEqual(record.diagrams["chr19:56465963-56466463"], "306-[+1]-36-[+1]-18-[+1]-80")
self.assertEqual(
record.diagrams["chr19:56465963-56466463"], "306-[+1]-36-[+1]-18-[+1]-80"
)
self.assertEqual(record.diagrams["chr4:41334759-41335259"], "204-[+1]-276")
self.assertEqual(record.diagrams["chr8:92969521-92970021"], "453-[+2]-17")
self.assertEqual(record.diagrams["chr6:145703215-145703715"], "154-[-2]-58-[+2]-228")
self.assertEqual(
record.diagrams["chr6:145703215-145703715"], "154-[-2]-58-[+2]-228"
)
self.assertEqual(record.diagrams["chr13:57679178-57679678"], "217-[-1]-263")
self.assertEqual(record.diagrams["chr19:45121628-45122128"], "35-[-2]-435")
self.assertEqual(record.diagrams["chr15:79757891-79758391"], "310-[+1]-170")
@ -1865,7 +2050,9 @@ class TestMAST(unittest.TestCase):
self.assertEqual(record.diagrams["chr13:81067500-81068000"], "252-[+1]-228")
self.assertEqual(record.diagrams["chr11:69714224-69714724"], "145-[+2]-325")
self.assertEqual(record.diagrams["chr2:103728071-103728571"], "369-[+1]-111")
self.assertEqual(record.diagrams["chr5:105994747-105995247"], "93-[+2]-153-[-2]-194")
self.assertEqual(
record.diagrams["chr5:105994747-105995247"], "93-[+2]-153-[-2]-194"
)
self.assertEqual(record.diagrams["chr17:84209565-84210065"], "64-[-2]-406")
self.assertEqual(record.diagrams["chr7:16507689-16508189"], "231-[+2]-239")
@ -2087,13 +2274,15 @@ class MotifTestPWM(unittest.TestCase):
def test_mixed_alphabets(self):
"""Test creating motif with mixed alphabets."""
# TODO - Can we support this?
seqs = [Seq("TACAA", IUPAC.unambiguous_dna),
Seq("TACGC", IUPAC.ambiguous_dna),
Seq("TACAC", IUPAC.extended_dna),
Seq("TACCC", Gapped(IUPAC.unambiguous_dna)),
Seq("AACCC", IUPAC.unambiguous_dna),
Seq("AATGC", IUPAC.unambiguous_dna),
Seq("AATGC", generic_dna)]
seqs = [
Seq("TACAA", IUPAC.unambiguous_dna),
Seq("TACGC", IUPAC.ambiguous_dna),
Seq("TACAC", IUPAC.extended_dna),
Seq("TACCC", Gapped(IUPAC.unambiguous_dna)),
Seq("AACCC", IUPAC.unambiguous_dna),
Seq("AATGC", IUPAC.unambiguous_dna),
Seq("AATGC", generic_dna),
]
# ValueError: Alphabets are inconsistent
self.assertRaises(ValueError, motifs.create, seqs)

View File

@ -16,6 +16,7 @@ from Bio import motifs
from Bio.Seq import Seq
import requires_internet
requires_internet.check()
@ -35,18 +36,22 @@ class TestotifWeblogo(unittest.TestCase):
def test_dna(self):
"""Test Bio.motifs.weblogo with a DNA sequence."""
self.check(["TACAA", "TACGC", "TACAC", "TACCC",
"AACCC", "AATGC", "AATGC"], "GATCBDSW")
self.check(
["TACAA", "TACGC", "TACAC", "TACCC", "AACCC", "AATGC", "AATGC"], "GATCBDSW"
)
def test_rna(self):
"""Test Bio.motifs.weblogo with an RNA sequence."""
self.check(["UACAA", "UACGC", "UACAC", "UACCC",
"AACCC", "AAUGC", "AAUGC"], "GAUC")
self.check(
["UACAA", "UACGC", "UACAC", "UACCC", "AACCC", "AAUGC", "AAUGC"], "GAUC"
)
def test_protein(self):
"""Test Bio.motifs.weblogo with a protein sequence."""
self.check(["ACDEG", "AYCRN", "HYLID", "AYHEL",
"ACDEH", "AYYRN", "HYIID"], "ACDEFGHIKLMNPQRSTVWYBXZJUO")
self.check(
["ACDEG", "AYCRN", "HYLID", "AYHEL", "ACDEH", "AYYRN", "HYIID"],
"ACDEFGHIKLMNPQRSTVWYBXZJUO",
)
if __name__ == "__main__":