From bc0d58b4c43152e598b8c4c5ceaba4c31c1c10b7 Mon Sep 17 00:00:00 2001 From: Gert Hulselmans Date: Fri, 9 Aug 2024 16:53:03 +0200 Subject: [PATCH] Add support for reading GAP and WEIGHT parameters from Cluster Buster motif file. --- Bio/motifs/clusterbuster.py | 25 +++++++++++++++++++++---- Tests/motifs/clusterbuster.pfm | 4 +++- Tests/test_motifs.py | 6 ++++++ 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/Bio/motifs/clusterbuster.py b/Bio/motifs/clusterbuster.py index 210bc69b6..3b599359f 100644 --- a/Bio/motifs/clusterbuster.py +++ b/Bio/motifs/clusterbuster.py @@ -23,12 +23,14 @@ class Record(list): def read(handle): """Read motifs in Cluster Buster position frequency matrix format from a file handle. - Cluster Buster motif format: http://zlab.bu.edu/cluster-buster/help/cis-format.html + Cluster Buster motif format: https://bu.wenglab.org/cluster-buster/help/cis-format.html """ motif_nbr = 0 record = Record() nucleotide_counts = {"A": [], "C": [], "G": [], "T": []} motif_name = "" + motif_gap = None + motif_weight = None for line in handle: line = line.strip() @@ -37,13 +39,23 @@ def read(handle): if motif_nbr != 0: motif = motifs.Motif(alphabet="GATC", counts=nucleotide_counts) motif.name = motif_name + motif.gap = motif_gap + motif.weight = motif_weight record.append(motif) motif_name = line[1:].strip() nucleotide_counts = {"A": [], "C": [], "G": [], "T": []} + motif_gap = None + motif_weight = None motif_nbr += 1 else: - if line.startswith("#"): + if line.startswith("# GAP"): + motif_gap = float(line.split()[2]) + continue + elif line.startswith("# WEIGHT"): + motif_weight = float(line.split()[2]) + continue + elif line.startswith("#"): continue matrix_columns = line.split() @@ -58,6 +70,8 @@ def read(handle): motif = motifs.Motif(alphabet="GATC", counts=nucleotide_counts) motif.name = motif_name + motif.gap = motif_gap + motif.weight = motif_weight record.append(motif) return record @@ -67,8 +81,11 @@ def write(motifs): """Return the representation of motifs in Cluster Buster position frequency matrix format.""" lines = [] for m in motifs: - line = f">{m.name}\n" - lines.append(line) + lines.append(f">{m.name}\n") + if m.weight: + lines.append(f"# WEIGHT: {m.weight}\n") + if m.gap: + lines.append(f"# GAP: {m.gap}\n") for ACGT_counts in zip( m.counts["A"], m.counts["C"], m.counts["G"], m.counts["T"] ): diff --git a/Tests/motifs/clusterbuster.pfm b/Tests/motifs/clusterbuster.pfm index ba7ba51e5..265d5ead3 100644 --- a/Tests/motifs/clusterbuster.pfm +++ b/Tests/motifs/clusterbuster.pfm @@ -13,6 +13,8 @@ 0 0 0 24 0 0 24 0 >MA0008.1 +# WEIGHT: 3.0 +# GAP: 10.0 3 13 4 5 21 1 0 3 25 0 0 0 @@ -20,4 +22,4 @@ 0 5 0 20 24 0 1 0 1 0 0 24 -0 0 2 23 \ No newline at end of file +0 0 2 23 diff --git a/Tests/test_motifs.py b/Tests/test_motifs.py index 501bc9f7f..7b5311efc 100644 --- a/Tests/test_motifs.py +++ b/Tests/test_motifs.py @@ -1659,6 +1659,8 @@ class TestClusterBuster(unittest.TestCase): ) self.assertEqual(motif[1:-2].consensus, "ACG") self.assertEqual(motif.length, 6) + self.assertIsNone(motif.weight) + self.assertIsNone(motif.gap) self.assertAlmostEqual(motif.counts["G", 0], 0.0) self.assertAlmostEqual(motif.counts["G", 1], 1.0) self.assertAlmostEqual(motif.counts["G", 2], 0.0) @@ -1705,6 +1707,8 @@ class TestClusterBuster(unittest.TestCase): ) self.assertEqual(motif[1:-2].consensus, "GCG") self.assertEqual(motif.length, 6) + self.assertIsNone(motif.weight) + self.assertIsNone(motif.gap) self.assertAlmostEqual(motif.counts["G", 0], 2.0) self.assertAlmostEqual(motif.counts["G", 1], 23.0) self.assertAlmostEqual(motif.counts["G", 2], 0.0) @@ -1753,6 +1757,8 @@ class TestClusterBuster(unittest.TestCase): ) self.assertEqual(motif[1:-2].consensus, "AATTA") self.assertEqual(motif.length, 8) + self.assertEqual(motif.weight, 3.0) + self.assertEqual(motif.gap, 10.0) self.assertAlmostEqual(motif.counts["G", 0], 4.0) self.assertAlmostEqual(motif.counts["G", 1], 0.0) self.assertAlmostEqual(motif.counts["G", 2], 0.0)