mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 21:53:47 +08:00
Add support for reading GAP and WEIGHT parameters from Cluster Buster motif file.
This commit is contained in:
committed by
Peter Cock
parent
f02217c40e
commit
bc0d58b4c4
@ -23,12 +23,14 @@ class Record(list):
|
|||||||
def read(handle):
|
def read(handle):
|
||||||
"""Read motifs in Cluster Buster position frequency matrix format from a file handle.
|
"""Read motifs in Cluster Buster position frequency matrix format from a file handle.
|
||||||
|
|
||||||
Cluster Buster motif format: http://zlab.bu.edu/cluster-buster/help/cis-format.html
|
Cluster Buster motif format: https://bu.wenglab.org/cluster-buster/help/cis-format.html
|
||||||
"""
|
"""
|
||||||
motif_nbr = 0
|
motif_nbr = 0
|
||||||
record = Record()
|
record = Record()
|
||||||
nucleotide_counts = {"A": [], "C": [], "G": [], "T": []}
|
nucleotide_counts = {"A": [], "C": [], "G": [], "T": []}
|
||||||
motif_name = ""
|
motif_name = ""
|
||||||
|
motif_gap = None
|
||||||
|
motif_weight = None
|
||||||
|
|
||||||
for line in handle:
|
for line in handle:
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
@ -37,13 +39,23 @@ def read(handle):
|
|||||||
if motif_nbr != 0:
|
if motif_nbr != 0:
|
||||||
motif = motifs.Motif(alphabet="GATC", counts=nucleotide_counts)
|
motif = motifs.Motif(alphabet="GATC", counts=nucleotide_counts)
|
||||||
motif.name = motif_name
|
motif.name = motif_name
|
||||||
|
motif.gap = motif_gap
|
||||||
|
motif.weight = motif_weight
|
||||||
record.append(motif)
|
record.append(motif)
|
||||||
|
|
||||||
motif_name = line[1:].strip()
|
motif_name = line[1:].strip()
|
||||||
nucleotide_counts = {"A": [], "C": [], "G": [], "T": []}
|
nucleotide_counts = {"A": [], "C": [], "G": [], "T": []}
|
||||||
|
motif_gap = None
|
||||||
|
motif_weight = None
|
||||||
motif_nbr += 1
|
motif_nbr += 1
|
||||||
else:
|
else:
|
||||||
if line.startswith("#"):
|
if line.startswith("# GAP"):
|
||||||
|
motif_gap = float(line.split()[2])
|
||||||
|
continue
|
||||||
|
elif line.startswith("# WEIGHT"):
|
||||||
|
motif_weight = float(line.split()[2])
|
||||||
|
continue
|
||||||
|
elif line.startswith("#"):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
matrix_columns = line.split()
|
matrix_columns = line.split()
|
||||||
@ -58,6 +70,8 @@ def read(handle):
|
|||||||
|
|
||||||
motif = motifs.Motif(alphabet="GATC", counts=nucleotide_counts)
|
motif = motifs.Motif(alphabet="GATC", counts=nucleotide_counts)
|
||||||
motif.name = motif_name
|
motif.name = motif_name
|
||||||
|
motif.gap = motif_gap
|
||||||
|
motif.weight = motif_weight
|
||||||
record.append(motif)
|
record.append(motif)
|
||||||
|
|
||||||
return record
|
return record
|
||||||
@ -67,8 +81,11 @@ def write(motifs):
|
|||||||
"""Return the representation of motifs in Cluster Buster position frequency matrix format."""
|
"""Return the representation of motifs in Cluster Buster position frequency matrix format."""
|
||||||
lines = []
|
lines = []
|
||||||
for m in motifs:
|
for m in motifs:
|
||||||
line = f">{m.name}\n"
|
lines.append(f">{m.name}\n")
|
||||||
lines.append(line)
|
if m.weight:
|
||||||
|
lines.append(f"# WEIGHT: {m.weight}\n")
|
||||||
|
if m.gap:
|
||||||
|
lines.append(f"# GAP: {m.gap}\n")
|
||||||
for ACGT_counts in zip(
|
for ACGT_counts in zip(
|
||||||
m.counts["A"], m.counts["C"], m.counts["G"], m.counts["T"]
|
m.counts["A"], m.counts["C"], m.counts["G"], m.counts["T"]
|
||||||
):
|
):
|
||||||
|
@ -13,6 +13,8 @@
|
|||||||
0 0 0 24
|
0 0 0 24
|
||||||
0 0 24 0
|
0 0 24 0
|
||||||
>MA0008.1
|
>MA0008.1
|
||||||
|
# WEIGHT: 3.0
|
||||||
|
# GAP: 10.0
|
||||||
3 13 4 5
|
3 13 4 5
|
||||||
21 1 0 3
|
21 1 0 3
|
||||||
25 0 0 0
|
25 0 0 0
|
||||||
|
@ -1659,6 +1659,8 @@ class TestClusterBuster(unittest.TestCase):
|
|||||||
)
|
)
|
||||||
self.assertEqual(motif[1:-2].consensus, "ACG")
|
self.assertEqual(motif[1:-2].consensus, "ACG")
|
||||||
self.assertEqual(motif.length, 6)
|
self.assertEqual(motif.length, 6)
|
||||||
|
self.assertIsNone(motif.weight)
|
||||||
|
self.assertIsNone(motif.gap)
|
||||||
self.assertAlmostEqual(motif.counts["G", 0], 0.0)
|
self.assertAlmostEqual(motif.counts["G", 0], 0.0)
|
||||||
self.assertAlmostEqual(motif.counts["G", 1], 1.0)
|
self.assertAlmostEqual(motif.counts["G", 1], 1.0)
|
||||||
self.assertAlmostEqual(motif.counts["G", 2], 0.0)
|
self.assertAlmostEqual(motif.counts["G", 2], 0.0)
|
||||||
@ -1705,6 +1707,8 @@ class TestClusterBuster(unittest.TestCase):
|
|||||||
)
|
)
|
||||||
self.assertEqual(motif[1:-2].consensus, "GCG")
|
self.assertEqual(motif[1:-2].consensus, "GCG")
|
||||||
self.assertEqual(motif.length, 6)
|
self.assertEqual(motif.length, 6)
|
||||||
|
self.assertIsNone(motif.weight)
|
||||||
|
self.assertIsNone(motif.gap)
|
||||||
self.assertAlmostEqual(motif.counts["G", 0], 2.0)
|
self.assertAlmostEqual(motif.counts["G", 0], 2.0)
|
||||||
self.assertAlmostEqual(motif.counts["G", 1], 23.0)
|
self.assertAlmostEqual(motif.counts["G", 1], 23.0)
|
||||||
self.assertAlmostEqual(motif.counts["G", 2], 0.0)
|
self.assertAlmostEqual(motif.counts["G", 2], 0.0)
|
||||||
@ -1753,6 +1757,8 @@ class TestClusterBuster(unittest.TestCase):
|
|||||||
)
|
)
|
||||||
self.assertEqual(motif[1:-2].consensus, "AATTA")
|
self.assertEqual(motif[1:-2].consensus, "AATTA")
|
||||||
self.assertEqual(motif.length, 8)
|
self.assertEqual(motif.length, 8)
|
||||||
|
self.assertEqual(motif.weight, 3.0)
|
||||||
|
self.assertEqual(motif.gap, 10.0)
|
||||||
self.assertAlmostEqual(motif.counts["G", 0], 4.0)
|
self.assertAlmostEqual(motif.counts["G", 0], 4.0)
|
||||||
self.assertAlmostEqual(motif.counts["G", 1], 0.0)
|
self.assertAlmostEqual(motif.counts["G", 1], 0.0)
|
||||||
self.assertAlmostEqual(motif.counts["G", 2], 0.0)
|
self.assertAlmostEqual(motif.counts["G", 2], 0.0)
|
||||||
|
Reference in New Issue
Block a user