Add support for reading GAP and WEIGHT parameters from Cluster Buster motif file.

This commit is contained in:
Gert Hulselmans
2024-08-09 16:53:03 +02:00
committed by Peter Cock
parent f02217c40e
commit bc0d58b4c4
3 changed files with 30 additions and 5 deletions

View File

@ -23,12 +23,14 @@ class Record(list):
def read(handle):
"""Read motifs in Cluster Buster position frequency matrix format from a file handle.
Cluster Buster motif format: http://zlab.bu.edu/cluster-buster/help/cis-format.html
Cluster Buster motif format: https://bu.wenglab.org/cluster-buster/help/cis-format.html
"""
motif_nbr = 0
record = Record()
nucleotide_counts = {"A": [], "C": [], "G": [], "T": []}
motif_name = ""
motif_gap = None
motif_weight = None
for line in handle:
line = line.strip()
@ -37,13 +39,23 @@ def read(handle):
if motif_nbr != 0:
motif = motifs.Motif(alphabet="GATC", counts=nucleotide_counts)
motif.name = motif_name
motif.gap = motif_gap
motif.weight = motif_weight
record.append(motif)
motif_name = line[1:].strip()
nucleotide_counts = {"A": [], "C": [], "G": [], "T": []}
motif_gap = None
motif_weight = None
motif_nbr += 1
else:
if line.startswith("#"):
if line.startswith("# GAP"):
motif_gap = float(line.split()[2])
continue
elif line.startswith("# WEIGHT"):
motif_weight = float(line.split()[2])
continue
elif line.startswith("#"):
continue
matrix_columns = line.split()
@ -58,6 +70,8 @@ def read(handle):
motif = motifs.Motif(alphabet="GATC", counts=nucleotide_counts)
motif.name = motif_name
motif.gap = motif_gap
motif.weight = motif_weight
record.append(motif)
return record
@ -67,8 +81,11 @@ def write(motifs):
"""Return the representation of motifs in Cluster Buster position frequency matrix format."""
lines = []
for m in motifs:
line = f">{m.name}\n"
lines.append(line)
lines.append(f">{m.name}\n")
if m.weight:
lines.append(f"# WEIGHT: {m.weight}\n")
if m.gap:
lines.append(f"# GAP: {m.gap}\n")
for ACGT_counts in zip(
m.counts["A"], m.counts["C"], m.counts["G"], m.counts["T"]
):

View File

@ -13,6 +13,8 @@
0 0 0 24
0 0 24 0
>MA0008.1
# WEIGHT: 3.0
# GAP: 10.0
3 13 4 5
21 1 0 3
25 0 0 0
@ -20,4 +22,4 @@
0 5 0 20
24 0 1 0
1 0 0 24
0 0 2 23
0 0 2 23

View File

@ -1659,6 +1659,8 @@ class TestClusterBuster(unittest.TestCase):
)
self.assertEqual(motif[1:-2].consensus, "ACG")
self.assertEqual(motif.length, 6)
self.assertIsNone(motif.weight)
self.assertIsNone(motif.gap)
self.assertAlmostEqual(motif.counts["G", 0], 0.0)
self.assertAlmostEqual(motif.counts["G", 1], 1.0)
self.assertAlmostEqual(motif.counts["G", 2], 0.0)
@ -1705,6 +1707,8 @@ class TestClusterBuster(unittest.TestCase):
)
self.assertEqual(motif[1:-2].consensus, "GCG")
self.assertEqual(motif.length, 6)
self.assertIsNone(motif.weight)
self.assertIsNone(motif.gap)
self.assertAlmostEqual(motif.counts["G", 0], 2.0)
self.assertAlmostEqual(motif.counts["G", 1], 23.0)
self.assertAlmostEqual(motif.counts["G", 2], 0.0)
@ -1753,6 +1757,8 @@ class TestClusterBuster(unittest.TestCase):
)
self.assertEqual(motif[1:-2].consensus, "AATTA")
self.assertEqual(motif.length, 8)
self.assertEqual(motif.weight, 3.0)
self.assertEqual(motif.gap, 10.0)
self.assertAlmostEqual(motif.counts["G", 0], 4.0)
self.assertAlmostEqual(motif.counts["G", 1], 0.0)
self.assertAlmostEqual(motif.counts["G", 2], 0.0)